1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */
23d14c5d2SYehuda Sadeh #ifndef _FS_CEPH_OSD_CLIENT_H
33d14c5d2SYehuda Sadeh #define _FS_CEPH_OSD_CLIENT_H
43d14c5d2SYehuda Sadeh
5a02a946dSIlya Dryomov #include <linux/bitrev.h>
63d14c5d2SYehuda Sadeh #include <linux/completion.h>
73d14c5d2SYehuda Sadeh #include <linux/kref.h>
83d14c5d2SYehuda Sadeh #include <linux/mempool.h>
93d14c5d2SYehuda Sadeh #include <linux/rbtree.h>
1002113a0fSElena Reshetova #include <linux/refcount.h>
1197e27aaaSXiubo Li #include <linux/ktime.h>
123d14c5d2SYehuda Sadeh
136c4a1915SAlex Elder #include <linux/ceph/types.h>
146c4a1915SAlex Elder #include <linux/ceph/osdmap.h>
156c4a1915SAlex Elder #include <linux/ceph/messenger.h>
16b2aa5d0bSIlya Dryomov #include <linux/ceph/msgpool.h>
176c4a1915SAlex Elder #include <linux/ceph/auth.h>
18c885837fSAlex Elder #include <linux/ceph/pagelist.h>
193d14c5d2SYehuda Sadeh
203d14c5d2SYehuda Sadeh struct ceph_msg;
213d14c5d2SYehuda Sadeh struct ceph_snap_context;
223d14c5d2SYehuda Sadeh struct ceph_osd_request;
233d14c5d2SYehuda Sadeh struct ceph_osd_client;
243d14c5d2SYehuda Sadeh
253d14c5d2SYehuda Sadeh /*
263d14c5d2SYehuda Sadeh * completion callback for async writepages
273d14c5d2SYehuda Sadeh */
2885e084feSIlya Dryomov typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
293d14c5d2SYehuda Sadeh
3063244fa1SIlya Dryomov #define CEPH_HOMELESS_OSD -1
3163244fa1SIlya Dryomov
3208b8a044SJeff Layton /*
33a679e50fSJeff Layton * A single extent in a SPARSE_READ reply.
34a679e50fSJeff Layton *
35a679e50fSJeff Layton * Note that these come from the OSD as little-endian values. On BE arches,
36a679e50fSJeff Layton * we convert them in-place after receipt.
37a679e50fSJeff Layton */
38a679e50fSJeff Layton struct ceph_sparse_extent {
39a679e50fSJeff Layton u64 off;
40a679e50fSJeff Layton u64 len;
41a679e50fSJeff Layton } __packed;
42a679e50fSJeff Layton
43f628d799SJeff Layton /* Sparse read state machine state values */
44f628d799SJeff Layton enum ceph_sparse_read_state {
45f628d799SJeff Layton CEPH_SPARSE_READ_HDR = 0,
46f628d799SJeff Layton CEPH_SPARSE_READ_EXTENTS,
47f628d799SJeff Layton CEPH_SPARSE_READ_DATA_LEN,
48*7d7046a6SXiubo Li CEPH_SPARSE_READ_DATA_PRE,
49f628d799SJeff Layton CEPH_SPARSE_READ_DATA,
50f628d799SJeff Layton };
51f628d799SJeff Layton
52f628d799SJeff Layton /*
53f628d799SJeff Layton * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
54f628d799SJeff Layton * 64-bit offset/length pairs, and then all of the actual file data
55f628d799SJeff Layton * concatenated after it (sans holes).
56f628d799SJeff Layton *
57f628d799SJeff Layton * Unfortunately, we don't know how long the extent array is until we've
58f628d799SJeff Layton * started reading the data section of the reply. The caller should send down
59f628d799SJeff Layton * a destination buffer for the array, but we'll alloc one if it's too small
60f628d799SJeff Layton * or if the caller doesn't.
61f628d799SJeff Layton */
62f628d799SJeff Layton struct ceph_sparse_read {
63f628d799SJeff Layton enum ceph_sparse_read_state sr_state; /* state machine state */
64f628d799SJeff Layton u64 sr_req_off; /* orig request offset */
65f628d799SJeff Layton u64 sr_req_len; /* orig request length */
66f628d799SJeff Layton u64 sr_pos; /* current pos in buffer */
67f628d799SJeff Layton int sr_index; /* current extent index */
68*7d7046a6SXiubo Li u32 sr_datalen; /* length of actual data */
69f628d799SJeff Layton u32 sr_count; /* extent count in reply */
70f628d799SJeff Layton int sr_ext_len; /* length of extent array */
71f628d799SJeff Layton struct ceph_sparse_extent *sr_extent; /* extent array */
72f628d799SJeff Layton };
73f628d799SJeff Layton
74a679e50fSJeff Layton /*
7508b8a044SJeff Layton * A given osd we're communicating with.
7608b8a044SJeff Layton *
7708b8a044SJeff Layton * Note that the o_requests tree can be searched while holding the "lock" mutex
7808b8a044SJeff Layton * or the "o_requests_lock" spinlock. Insertion or removal requires both!
7908b8a044SJeff Layton */
803d14c5d2SYehuda Sadeh struct ceph_osd {
8102113a0fSElena Reshetova refcount_t o_ref;
82f628d799SJeff Layton int o_sparse_op_idx;
833d14c5d2SYehuda Sadeh struct ceph_osd_client *o_osdc;
843d14c5d2SYehuda Sadeh int o_osd;
853d14c5d2SYehuda Sadeh int o_incarnation;
863d14c5d2SYehuda Sadeh struct rb_node o_node;
873d14c5d2SYehuda Sadeh struct ceph_connection o_con;
8808b8a044SJeff Layton spinlock_t o_requests_lock;
895aea3dcdSIlya Dryomov struct rb_root o_requests;
90922dab61SIlya Dryomov struct rb_root o_linger_requests;
91a02a946dSIlya Dryomov struct rb_root o_backoff_mappings;
92a02a946dSIlya Dryomov struct rb_root o_backoffs_by_id;
933d14c5d2SYehuda Sadeh struct list_head o_osd_lru;
946c4a1915SAlex Elder struct ceph_auth_handshake o_auth;
953d14c5d2SYehuda Sadeh unsigned long lru_ttl;
963d14c5d2SYehuda Sadeh struct list_head o_keepalive_item;
975aea3dcdSIlya Dryomov struct mutex lock;
98f628d799SJeff Layton struct ceph_sparse_read o_sparse_read;
993d14c5d2SYehuda Sadeh };
1003d14c5d2SYehuda Sadeh
1013f1af42aSIlya Dryomov #define CEPH_OSD_SLAB_OPS 2
1023f1af42aSIlya Dryomov #define CEPH_OSD_MAX_OPS 16
1031b83bef2SSage Weil
1042ac2b7a6SAlex Elder enum ceph_osd_data_type {
105ec9123c5SAlex Elder CEPH_OSD_DATA_TYPE_NONE = 0,
1062ac2b7a6SAlex Elder CEPH_OSD_DATA_TYPE_PAGES,
1079a5e6d09SAlex Elder CEPH_OSD_DATA_TYPE_PAGELIST,
1082ac2b7a6SAlex Elder #ifdef CONFIG_BLOCK
1092ac2b7a6SAlex Elder CEPH_OSD_DATA_TYPE_BIO,
1102ac2b7a6SAlex Elder #endif /* CONFIG_BLOCK */
111b9e281c2SIlya Dryomov CEPH_OSD_DATA_TYPE_BVECS,
112dee0c5f8SJeff Layton CEPH_OSD_DATA_TYPE_ITER,
1132ac2b7a6SAlex Elder };
1142ac2b7a6SAlex Elder
1152794a82aSAlex Elder struct ceph_osd_data {
1162ac2b7a6SAlex Elder enum ceph_osd_data_type type;
1172ac2b7a6SAlex Elder union {
1182794a82aSAlex Elder struct {
1192794a82aSAlex Elder struct page **pages;
120e0c59487SAlex Elder u64 length;
1212794a82aSAlex Elder u32 alignment;
1222794a82aSAlex Elder bool pages_from_pool;
1232794a82aSAlex Elder bool own_pages;
1242794a82aSAlex Elder };
1259a5e6d09SAlex Elder struct ceph_pagelist *pagelist;
1262794a82aSAlex Elder #ifdef CONFIG_BLOCK
127fdce58ccSAlex Elder struct {
1285359a17dSIlya Dryomov struct ceph_bio_iter bio_pos;
1295359a17dSIlya Dryomov u32 bio_length;
130fdce58ccSAlex Elder };
1312794a82aSAlex Elder #endif /* CONFIG_BLOCK */
1320010f705SIlya Dryomov struct {
133b9e281c2SIlya Dryomov struct ceph_bvec_iter bvec_pos;
1340010f705SIlya Dryomov u32 num_bvecs;
1350010f705SIlya Dryomov };
136dee0c5f8SJeff Layton struct iov_iter iter;
1372794a82aSAlex Elder };
1382794a82aSAlex Elder };
1392794a82aSAlex Elder
14079528734SAlex Elder struct ceph_osd_req_op {
14179528734SAlex Elder u16 op; /* CEPH_OSD_OP_* */
1427b25bf5fSIlya Dryomov u32 flags; /* CEPH_OSD_OP_FLAG_* */
143de2aa102SIlya Dryomov u32 indata_len; /* request */
1447665d85bSYan, Zheng u32 outdata_len; /* reply */
1457665d85bSYan, Zheng s32 rval;
1467665d85bSYan, Zheng
14779528734SAlex Elder union {
14849719778SAlex Elder struct ceph_osd_data raw_data_in;
14979528734SAlex Elder struct {
15079528734SAlex Elder u64 offset, length;
15179528734SAlex Elder u64 truncate_size;
15279528734SAlex Elder u32 truncate_seq;
153a679e50fSJeff Layton int sparse_ext_cnt;
154a679e50fSJeff Layton struct ceph_sparse_extent *sparse_ext;
1555476492fSAlex Elder struct ceph_osd_data osd_data;
15679528734SAlex Elder } extent;
15779528734SAlex Elder struct {
158d7d5a007SIlya Dryomov u32 name_len;
159d7d5a007SIlya Dryomov u32 value_len;
160d74b50beSYan, Zheng __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
161d74b50beSYan, Zheng __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
162d74b50beSYan, Zheng struct ceph_osd_data osd_data;
163d74b50beSYan, Zheng } xattr;
164d74b50beSYan, Zheng struct {
16579528734SAlex Elder const char *class_name;
16679528734SAlex Elder const char *method_name;
1675476492fSAlex Elder struct ceph_osd_data request_info;
16804017e29SAlex Elder struct ceph_osd_data request_data;
1695476492fSAlex Elder struct ceph_osd_data response_data;
17079528734SAlex Elder __u8 class_len;
17179528734SAlex Elder __u8 method_len;
172bb873b53SIlya Dryomov u32 indata_len;
17379528734SAlex Elder } cls;
17479528734SAlex Elder struct {
17579528734SAlex Elder u64 cookie;
176922dab61SIlya Dryomov __u8 op; /* CEPH_OSD_WATCH_OP_ */
177922dab61SIlya Dryomov u32 gen;
17879528734SAlex Elder } watch;
179c647b8a8SIlya Dryomov struct {
180922dab61SIlya Dryomov struct ceph_osd_data request_data;
181922dab61SIlya Dryomov } notify_ack;
182922dab61SIlya Dryomov struct {
18319079203SIlya Dryomov u64 cookie;
18419079203SIlya Dryomov struct ceph_osd_data request_data;
18519079203SIlya Dryomov struct ceph_osd_data response_data;
18619079203SIlya Dryomov } notify;
18719079203SIlya Dryomov struct {
188a4ed38d7SDouglas Fuller struct ceph_osd_data response_data;
189a4ed38d7SDouglas Fuller } list_watchers;
190a4ed38d7SDouglas Fuller struct {
191c647b8a8SIlya Dryomov u64 expected_object_size;
192c647b8a8SIlya Dryomov u64 expected_write_size;
193d3798accSIlya Dryomov u32 flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
194c647b8a8SIlya Dryomov } alloc_hint;
19523ddf9beSLuis Henriques struct {
19623ddf9beSLuis Henriques u64 snapid;
19723ddf9beSLuis Henriques u64 src_version;
19823ddf9beSLuis Henriques u8 flags;
19923ddf9beSLuis Henriques u32 src_fadvise_flags;
20023ddf9beSLuis Henriques struct ceph_osd_data osd_data;
20123ddf9beSLuis Henriques } copy_from;
20269dd3b39SJeff Layton struct {
20369dd3b39SJeff Layton u64 ver;
20469dd3b39SJeff Layton } assert_ver;
20579528734SAlex Elder };
20679528734SAlex Elder };
20779528734SAlex Elder
20863244fa1SIlya Dryomov struct ceph_osd_request_target {
20963244fa1SIlya Dryomov struct ceph_object_id base_oid;
21063244fa1SIlya Dryomov struct ceph_object_locator base_oloc;
21163244fa1SIlya Dryomov struct ceph_object_id target_oid;
21263244fa1SIlya Dryomov struct ceph_object_locator target_oloc;
21363244fa1SIlya Dryomov
214dc98ff72SIlya Dryomov struct ceph_pg pgid; /* last raw pg we mapped to */
215dc98ff72SIlya Dryomov struct ceph_spg spgid; /* last actual spg we mapped to */
21663244fa1SIlya Dryomov u32 pg_num;
21763244fa1SIlya Dryomov u32 pg_num_mask;
21863244fa1SIlya Dryomov struct ceph_osds acting;
21963244fa1SIlya Dryomov struct ceph_osds up;
22063244fa1SIlya Dryomov int size;
22163244fa1SIlya Dryomov int min_size;
22263244fa1SIlya Dryomov bool sort_bitwise;
223ae78dd81SIlya Dryomov bool recovery_deletes;
22463244fa1SIlya Dryomov
22563244fa1SIlya Dryomov unsigned int flags; /* CEPH_OSD_FLAG_* */
226117d96a0SIlya Dryomov bool used_replica;
22763244fa1SIlya Dryomov bool paused;
22863244fa1SIlya Dryomov
22904c7d789SIlya Dryomov u32 epoch;
230dc93e0e2SIlya Dryomov u32 last_force_resend;
231dc93e0e2SIlya Dryomov
23263244fa1SIlya Dryomov int osd;
23363244fa1SIlya Dryomov };
23463244fa1SIlya Dryomov
2353d14c5d2SYehuda Sadeh /* an in-flight request */
2363d14c5d2SYehuda Sadeh struct ceph_osd_request {
2373d14c5d2SYehuda Sadeh u64 r_tid; /* unique for this client */
2383d14c5d2SYehuda Sadeh struct rb_node r_node;
2394609245eSIlya Dryomov struct rb_node r_mc_node; /* map check */
24088bc1922SIlya Dryomov struct work_struct r_complete_work;
2413d14c5d2SYehuda Sadeh struct ceph_osd *r_osd;
242a66dd383SIlya Dryomov
243a66dd383SIlya Dryomov struct ceph_osd_request_target r_t;
244a66dd383SIlya Dryomov #define r_base_oid r_t.base_oid
245a66dd383SIlya Dryomov #define r_base_oloc r_t.base_oloc
246a66dd383SIlya Dryomov #define r_flags r_t.flags
2473d14c5d2SYehuda Sadeh
2483d14c5d2SYehuda Sadeh struct ceph_msg *r_request, *r_reply;
2493d14c5d2SYehuda Sadeh u32 r_sent; /* >0 if r_request is sending/sent */
2501b83bef2SSage Weil
25179528734SAlex Elder /* request osd ops array */
25279528734SAlex Elder unsigned int r_num_ops;
25379528734SAlex Elder
2541b83bef2SSage Weil int r_result;
2553d14c5d2SYehuda Sadeh
2563d14c5d2SYehuda Sadeh struct ceph_osd_client *r_osdc;
2573d14c5d2SYehuda Sadeh struct kref r_kref;
2583d14c5d2SYehuda Sadeh bool r_mempool;
25969dd3b39SJeff Layton bool r_linger; /* don't resend on failure */
260b18b9550SIlya Dryomov struct completion r_completion; /* private to osd_client.c */
26126be8808SAlex Elder ceph_osdc_callback_t r_callback;
2623d14c5d2SYehuda Sadeh
2633d14c5d2SYehuda Sadeh struct inode *r_inode; /* for use by callbacks */
26494e85771SIlya Dryomov struct list_head r_private_item; /* ditto */
2653d14c5d2SYehuda Sadeh void *r_priv; /* ditto */
2663d14c5d2SYehuda Sadeh
267bb873b53SIlya Dryomov /* set by submitter */
268bb873b53SIlya Dryomov u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */
269bb873b53SIlya Dryomov struct ceph_snap_context *r_snapc; /* for writes */
270fac02ddfSArnd Bergmann struct timespec64 r_mtime; /* ditto */
271bb873b53SIlya Dryomov u64 r_data_offset; /* ditto */
2723d14c5d2SYehuda Sadeh
273bb873b53SIlya Dryomov /* internal */
27469dd3b39SJeff Layton u64 r_version; /* data version sent in reply */
275bb873b53SIlya Dryomov unsigned long r_stamp; /* jiffies, send or check time */
2767cc5e38fSIlya Dryomov unsigned long r_start_stamp; /* jiffies */
27797e27aaaSXiubo Li ktime_t r_start_latency; /* ktime_t */
27897e27aaaSXiubo Li ktime_t r_end_latency; /* ktime_t */
279bb873b53SIlya Dryomov int r_attempts;
2804609245eSIlya Dryomov u32 r_map_dne_bound;
2813f1af42aSIlya Dryomov
2823f1af42aSIlya Dryomov struct ceph_osd_req_op r_ops[];
2833d14c5d2SYehuda Sadeh };
2843d14c5d2SYehuda Sadeh
285205ee118SIlya Dryomov struct ceph_request_redirect {
286205ee118SIlya Dryomov struct ceph_object_locator oloc;
287205ee118SIlya Dryomov };
288205ee118SIlya Dryomov
2898cb441c0SIlya Dryomov /*
2908cb441c0SIlya Dryomov * osd request identifier
2918cb441c0SIlya Dryomov *
2928cb441c0SIlya Dryomov * caller name + incarnation# + tid to unique identify this request
2938cb441c0SIlya Dryomov */
2948cb441c0SIlya Dryomov struct ceph_osd_reqid {
2958cb441c0SIlya Dryomov struct ceph_entity_name name;
2968cb441c0SIlya Dryomov __le64 tid;
2978cb441c0SIlya Dryomov __le32 inc;
2988cb441c0SIlya Dryomov } __packed;
2998cb441c0SIlya Dryomov
3008cb441c0SIlya Dryomov struct ceph_blkin_trace_info {
3018cb441c0SIlya Dryomov __le64 trace_id;
3028cb441c0SIlya Dryomov __le64 span_id;
3038cb441c0SIlya Dryomov __le64 parent_span_id;
3048cb441c0SIlya Dryomov } __packed;
3058cb441c0SIlya Dryomov
306922dab61SIlya Dryomov typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
307922dab61SIlya Dryomov u64 notifier_id, void *data, size_t data_len);
308922dab61SIlya Dryomov typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
309a40c4f10SYehuda Sadeh
310922dab61SIlya Dryomov struct ceph_osd_linger_request {
311922dab61SIlya Dryomov struct ceph_osd_client *osdc;
312922dab61SIlya Dryomov u64 linger_id;
313922dab61SIlya Dryomov bool committed;
31419079203SIlya Dryomov bool is_watch; /* watch or notify */
315922dab61SIlya Dryomov
316922dab61SIlya Dryomov struct ceph_osd *osd;
317922dab61SIlya Dryomov struct ceph_osd_request *reg_req;
318922dab61SIlya Dryomov struct ceph_osd_request *ping_req;
319922dab61SIlya Dryomov unsigned long ping_sent;
320b07d3c4bSIlya Dryomov unsigned long watch_valid_thru;
321b07d3c4bSIlya Dryomov struct list_head pending_lworks;
322922dab61SIlya Dryomov
323922dab61SIlya Dryomov struct ceph_osd_request_target t;
3244609245eSIlya Dryomov u32 map_dne_bound;
325922dab61SIlya Dryomov
326fac02ddfSArnd Bergmann struct timespec64 mtime;
327922dab61SIlya Dryomov
328922dab61SIlya Dryomov struct kref kref;
329922dab61SIlya Dryomov struct mutex lock;
330922dab61SIlya Dryomov struct rb_node node; /* osd */
331922dab61SIlya Dryomov struct rb_node osdc_node; /* osdc */
3324609245eSIlya Dryomov struct rb_node mc_node; /* map check */
333922dab61SIlya Dryomov struct list_head scan_item;
334922dab61SIlya Dryomov
335922dab61SIlya Dryomov struct completion reg_commit_wait;
33619079203SIlya Dryomov struct completion notify_finish_wait;
337922dab61SIlya Dryomov int reg_commit_error;
33819079203SIlya Dryomov int notify_finish_error;
339922dab61SIlya Dryomov int last_error;
340922dab61SIlya Dryomov
341922dab61SIlya Dryomov u32 register_gen;
34219079203SIlya Dryomov u64 notify_id;
343922dab61SIlya Dryomov
344922dab61SIlya Dryomov rados_watchcb2_t wcb;
345922dab61SIlya Dryomov rados_watcherrcb_t errcb;
346922dab61SIlya Dryomov void *data;
34719079203SIlya Dryomov
34875dbb685SIlya Dryomov struct ceph_pagelist *request_pl;
34975dbb685SIlya Dryomov struct page **notify_id_pages;
35075dbb685SIlya Dryomov
35119079203SIlya Dryomov struct page ***preply_pages;
35219079203SIlya Dryomov size_t *preply_len;
353a40c4f10SYehuda Sadeh };
354a40c4f10SYehuda Sadeh
355a4ed38d7SDouglas Fuller struct ceph_watch_item {
356a4ed38d7SDouglas Fuller struct ceph_entity_name name;
357a4ed38d7SDouglas Fuller u64 cookie;
358a4ed38d7SDouglas Fuller struct ceph_entity_addr addr;
359a4ed38d7SDouglas Fuller };
360a4ed38d7SDouglas Fuller
361a02a946dSIlya Dryomov struct ceph_spg_mapping {
362a02a946dSIlya Dryomov struct rb_node node;
363a02a946dSIlya Dryomov struct ceph_spg spgid;
364a02a946dSIlya Dryomov
365a02a946dSIlya Dryomov struct rb_root backoffs;
366a02a946dSIlya Dryomov };
367a02a946dSIlya Dryomov
368a02a946dSIlya Dryomov struct ceph_hobject_id {
369a02a946dSIlya Dryomov void *key;
370a02a946dSIlya Dryomov size_t key_len;
371a02a946dSIlya Dryomov void *oid;
372a02a946dSIlya Dryomov size_t oid_len;
373a02a946dSIlya Dryomov u64 snapid;
374a02a946dSIlya Dryomov u32 hash;
375a02a946dSIlya Dryomov u8 is_max;
376a02a946dSIlya Dryomov void *nspace;
377a02a946dSIlya Dryomov size_t nspace_len;
378a02a946dSIlya Dryomov s64 pool;
379a02a946dSIlya Dryomov
380a02a946dSIlya Dryomov /* cache */
381a02a946dSIlya Dryomov u32 hash_reverse_bits;
382a02a946dSIlya Dryomov };
383a02a946dSIlya Dryomov
ceph_hoid_build_hash_cache(struct ceph_hobject_id * hoid)384a02a946dSIlya Dryomov static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
385a02a946dSIlya Dryomov {
386a02a946dSIlya Dryomov hoid->hash_reverse_bits = bitrev32(hoid->hash);
387a02a946dSIlya Dryomov }
388a02a946dSIlya Dryomov
389a02a946dSIlya Dryomov /*
390a02a946dSIlya Dryomov * PG-wide backoff: [begin, end)
391a02a946dSIlya Dryomov * per-object backoff: begin == end
392a02a946dSIlya Dryomov */
393a02a946dSIlya Dryomov struct ceph_osd_backoff {
394a02a946dSIlya Dryomov struct rb_node spg_node;
395a02a946dSIlya Dryomov struct rb_node id_node;
396a02a946dSIlya Dryomov
397a02a946dSIlya Dryomov struct ceph_spg spgid;
398a02a946dSIlya Dryomov u64 id;
399a02a946dSIlya Dryomov struct ceph_hobject_id *begin;
400a02a946dSIlya Dryomov struct ceph_hobject_id *end;
401a02a946dSIlya Dryomov };
402a02a946dSIlya Dryomov
403264048afSIlya Dryomov #define CEPH_LINGER_ID_START 0xffff000000000000ULL
404264048afSIlya Dryomov
4053d14c5d2SYehuda Sadeh struct ceph_osd_client {
4063d14c5d2SYehuda Sadeh struct ceph_client *client;
4073d14c5d2SYehuda Sadeh
4083d14c5d2SYehuda Sadeh struct ceph_osdmap *osdmap; /* current map */
4095aea3dcdSIlya Dryomov struct rw_semaphore lock;
4103d14c5d2SYehuda Sadeh
4113d14c5d2SYehuda Sadeh struct rb_root osds; /* osds */
4123d14c5d2SYehuda Sadeh struct list_head osd_lru; /* idle osds */
4139dd2845cSIlya Dryomov spinlock_t osd_lru_lock;
41458eb7932SJeff Layton u32 epoch_barrier;
4155aea3dcdSIlya Dryomov struct ceph_osd homeless_osd;
4165aea3dcdSIlya Dryomov atomic64_t last_tid; /* tid of last request */
417922dab61SIlya Dryomov u64 last_linger_id;
418922dab61SIlya Dryomov struct rb_root linger_requests; /* lingering requests */
4194609245eSIlya Dryomov struct rb_root map_checks;
4204609245eSIlya Dryomov struct rb_root linger_map_checks;
4215aea3dcdSIlya Dryomov atomic_t num_requests;
4225aea3dcdSIlya Dryomov atomic_t num_homeless;
42366850df5SIlya Dryomov int abort_err;
4243d14c5d2SYehuda Sadeh struct delayed_work timeout_work;
4253d14c5d2SYehuda Sadeh struct delayed_work osds_timeout_work;
4263d14c5d2SYehuda Sadeh #ifdef CONFIG_DEBUG_FS
4273d14c5d2SYehuda Sadeh struct dentry *debugfs_file;
4283d14c5d2SYehuda Sadeh #endif
4293d14c5d2SYehuda Sadeh
4303d14c5d2SYehuda Sadeh mempool_t *req_mempool;
4313d14c5d2SYehuda Sadeh
4323d14c5d2SYehuda Sadeh struct ceph_msgpool msgpool_op;
4333d14c5d2SYehuda Sadeh struct ceph_msgpool msgpool_op_reply;
434a40c4f10SYehuda Sadeh
435a40c4f10SYehuda Sadeh struct workqueue_struct *notify_wq;
43688bc1922SIlya Dryomov struct workqueue_struct *completion_wq;
4373d14c5d2SYehuda Sadeh };
4383d14c5d2SYehuda Sadeh
ceph_osdmap_flag(struct ceph_osd_client * osdc,int flag)439b7ec35b3SIlya Dryomov static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
440b7ec35b3SIlya Dryomov {
441b7ec35b3SIlya Dryomov return osdc->osdmap->flags & flag;
442b7ec35b3SIlya Dryomov }
443b7ec35b3SIlya Dryomov
4445522ae0bSAlex Elder extern int ceph_osdc_setup(void);
4455522ae0bSAlex Elder extern void ceph_osdc_cleanup(void);
4465522ae0bSAlex Elder
4473d14c5d2SYehuda Sadeh extern int ceph_osdc_init(struct ceph_osd_client *osdc,
4483d14c5d2SYehuda Sadeh struct ceph_client *client);
4493d14c5d2SYehuda Sadeh extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
450120a75eaSYan, Zheng extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
4513d14c5d2SYehuda Sadeh
4523d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
4533d14c5d2SYehuda Sadeh struct ceph_msg *msg);
4543d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
4553d14c5d2SYehuda Sadeh struct ceph_msg *msg);
45658eb7932SJeff Layton void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
45766850df5SIlya Dryomov void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
4582cef0ba8SYan, Zheng void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
4593d14c5d2SYehuda Sadeh
4604cf3e6dfSIlya Dryomov #define osd_req_op_data(oreq, whch, typ, fld) \
4614cf3e6dfSIlya Dryomov ({ \
4624cf3e6dfSIlya Dryomov struct ceph_osd_request *__oreq = (oreq); \
4634cf3e6dfSIlya Dryomov unsigned int __whch = (whch); \
4644cf3e6dfSIlya Dryomov BUG_ON(__whch >= __oreq->r_num_ops); \
4654cf3e6dfSIlya Dryomov &__oreq->r_ops[__whch].typ.fld; \
4664cf3e6dfSIlya Dryomov })
4674cf3e6dfSIlya Dryomov
468042f6498SJeff Layton struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req,
469144cba14SYan, Zheng unsigned int which, u16 opcode, u32 flags);
47049719778SAlex Elder
47149719778SAlex Elder extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
47249719778SAlex Elder unsigned int which,
47349719778SAlex Elder struct page **pages, u64 length,
47449719778SAlex Elder u32 alignment, bool pages_from_pool,
47549719778SAlex Elder bool own_pages);
47649719778SAlex Elder
477c99d2d4aSAlex Elder extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
478c99d2d4aSAlex Elder unsigned int which, u16 opcode,
47933803f33SAlex Elder u64 offset, u64 length,
48033803f33SAlex Elder u64 truncate_size, u32 truncate_seq);
481c99d2d4aSAlex Elder extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
482c99d2d4aSAlex Elder unsigned int which, u64 length);
4832c63f49aSYan, Zheng extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
4842c63f49aSYan, Zheng unsigned int which, u64 offset_inc);
485a4ce40a9SAlex Elder
486a4ce40a9SAlex Elder extern struct ceph_osd_data *osd_req_op_extent_osd_data(
487a4ce40a9SAlex Elder struct ceph_osd_request *osd_req,
488406e2c9fSAlex Elder unsigned int which);
489a4ce40a9SAlex Elder
490a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
491406e2c9fSAlex Elder unsigned int which,
492a4ce40a9SAlex Elder struct page **pages, u64 length,
493a4ce40a9SAlex Elder u32 alignment, bool pages_from_pool,
494a4ce40a9SAlex Elder bool own_pages);
495a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
496406e2c9fSAlex Elder unsigned int which,
497a4ce40a9SAlex Elder struct ceph_pagelist *pagelist);
498a4ce40a9SAlex Elder #ifdef CONFIG_BLOCK
4995359a17dSIlya Dryomov void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
500406e2c9fSAlex Elder unsigned int which,
5015359a17dSIlya Dryomov struct ceph_bio_iter *bio_pos,
5025359a17dSIlya Dryomov u32 bio_length);
503a4ce40a9SAlex Elder #endif /* CONFIG_BLOCK */
5040010f705SIlya Dryomov void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
5050010f705SIlya Dryomov unsigned int which,
5060010f705SIlya Dryomov struct bio_vec *bvecs, u32 num_bvecs,
5070010f705SIlya Dryomov u32 bytes);
508b9e281c2SIlya Dryomov void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
509b9e281c2SIlya Dryomov unsigned int which,
510b9e281c2SIlya Dryomov struct ceph_bvec_iter *bvec_pos);
511dee0c5f8SJeff Layton void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
512dee0c5f8SJeff Layton unsigned int which, struct iov_iter *iter);
513a4ce40a9SAlex Elder
51404017e29SAlex Elder extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
51504017e29SAlex Elder unsigned int which,
51604017e29SAlex Elder struct ceph_pagelist *pagelist);
5176c57b554SAlex Elder extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
5186c57b554SAlex Elder unsigned int which,
5196c57b554SAlex Elder struct page **pages, u64 length,
5206c57b554SAlex Elder u32 alignment, bool pages_from_pool,
5216c57b554SAlex Elder bool own_pages);
522b9e281c2SIlya Dryomov void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
523b9e281c2SIlya Dryomov unsigned int which,
5240010f705SIlya Dryomov struct bio_vec *bvecs, u32 num_bvecs,
5250010f705SIlya Dryomov u32 bytes);
526a4ce40a9SAlex Elder extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
527c99d2d4aSAlex Elder unsigned int which,
528a4ce40a9SAlex Elder struct page **pages, u64 length,
529a4ce40a9SAlex Elder u32 alignment, bool pages_from_pool,
530a4ce40a9SAlex Elder bool own_pages);
53124639ce5SIlya Dryomov int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
53204017e29SAlex Elder const char *class, const char *method);
533d74b50beSYan, Zheng extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
534d74b50beSYan, Zheng u16 opcode, const char *name, const void *value,
535d74b50beSYan, Zheng size_t size, u8 cmp_op, u8 cmp_mode);
536c647b8a8SIlya Dryomov extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
537c647b8a8SIlya Dryomov unsigned int which,
538c647b8a8SIlya Dryomov u64 expected_object_size,
539d3798accSIlya Dryomov u64 expected_write_size,
540d3798accSIlya Dryomov u32 flags);
541aca39d9eSLuís Henriques extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
542aca39d9eSLuís Henriques u64 src_snapid, u64 src_version,
543aca39d9eSLuís Henriques struct ceph_object_id *src_oid,
544aca39d9eSLuís Henriques struct ceph_object_locator *src_oloc,
545aca39d9eSLuís Henriques u32 src_fadvise_flags,
546aca39d9eSLuís Henriques u32 dst_fadvise_flags,
547aca39d9eSLuís Henriques u32 truncate_seq, u64 truncate_size,
548aca39d9eSLuís Henriques u8 copy_from_flags);
54933803f33SAlex Elder
5503d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
5513d14c5d2SYehuda Sadeh struct ceph_snap_context *snapc,
552acead002SAlex Elder unsigned int num_ops,
5533d14c5d2SYehuda Sadeh bool use_mempool,
55454a54007SAlex Elder gfp_t gfp_flags);
55513d1ad16SIlya Dryomov int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
5563d14c5d2SYehuda Sadeh
5573d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
5583d14c5d2SYehuda Sadeh struct ceph_file_layout *layout,
5593d14c5d2SYehuda Sadeh struct ceph_vino vino,
560acead002SAlex Elder u64 offset, u64 *len,
561715e4cd4SYan, Zheng unsigned int which, int num_ops,
562715e4cd4SYan, Zheng int opcode, int flags,
5633d14c5d2SYehuda Sadeh struct ceph_snap_context *snapc,
564acead002SAlex Elder u32 truncate_seq, u64 truncate_size,
565153e5167SAlex Elder bool use_mempool);
5663d14c5d2SYehuda Sadeh
567a679e50fSJeff Layton int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
568a679e50fSJeff Layton
569a679e50fSJeff Layton /*
570a679e50fSJeff Layton * How big an extent array should we preallocate for a sparse read? This is
571a679e50fSJeff Layton * just a starting value. If we get more than this back from the OSD, the
572a679e50fSJeff Layton * receiver will reallocate.
573a679e50fSJeff Layton */
574a679e50fSJeff Layton #define CEPH_SPARSE_EXT_ARRAY_INITIAL 16
575a679e50fSJeff Layton
ceph_alloc_sparse_ext_map(struct ceph_osd_req_op * op)576a679e50fSJeff Layton static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
577a679e50fSJeff Layton {
578a679e50fSJeff Layton return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
579a679e50fSJeff Layton }
580a679e50fSJeff Layton
5819e94af20SIlya Dryomov extern void ceph_osdc_get_request(struct ceph_osd_request *req);
5829e94af20SIlya Dryomov extern void ceph_osdc_put_request(struct ceph_osd_request *req);
5833d14c5d2SYehuda Sadeh
584a8af0d68SJeff Layton void ceph_osdc_start_request(struct ceph_osd_client *osdc,
585a8af0d68SJeff Layton struct ceph_osd_request *req);
586c9f9b93dSIlya Dryomov extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
5873d14c5d2SYehuda Sadeh extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
5883d14c5d2SYehuda Sadeh struct ceph_osd_request *req);
5893d14c5d2SYehuda Sadeh extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
5903d14c5d2SYehuda Sadeh
591dd935f44SJosh Durgin extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
5927cca78c9SIlya Dryomov void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
593dd935f44SJosh Durgin
594428a7158SDouglas Fuller int ceph_osdc_call(struct ceph_osd_client *osdc,
595428a7158SDouglas Fuller struct ceph_object_id *oid,
596428a7158SDouglas Fuller struct ceph_object_locator *oloc,
597428a7158SDouglas Fuller const char *class, const char *method,
598428a7158SDouglas Fuller unsigned int flags,
599428a7158SDouglas Fuller struct page *req_page, size_t req_len,
60068ada915SIlya Dryomov struct page **resp_pages, size_t *resp_len);
601428a7158SDouglas Fuller
602922dab61SIlya Dryomov /* watch/notify */
603922dab61SIlya Dryomov struct ceph_osd_linger_request *
604922dab61SIlya Dryomov ceph_osdc_watch(struct ceph_osd_client *osdc,
605922dab61SIlya Dryomov struct ceph_object_id *oid,
606922dab61SIlya Dryomov struct ceph_object_locator *oloc,
607922dab61SIlya Dryomov rados_watchcb2_t wcb,
608922dab61SIlya Dryomov rados_watcherrcb_t errcb,
609922dab61SIlya Dryomov void *data);
610922dab61SIlya Dryomov int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
611922dab61SIlya Dryomov struct ceph_osd_linger_request *lreq);
612922dab61SIlya Dryomov
613922dab61SIlya Dryomov int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
614922dab61SIlya Dryomov struct ceph_object_id *oid,
615922dab61SIlya Dryomov struct ceph_object_locator *oloc,
616922dab61SIlya Dryomov u64 notify_id,
617922dab61SIlya Dryomov u64 cookie,
618922dab61SIlya Dryomov void *payload,
6196d54228fSIlya Dryomov u32 payload_len);
62019079203SIlya Dryomov int ceph_osdc_notify(struct ceph_osd_client *osdc,
62119079203SIlya Dryomov struct ceph_object_id *oid,
62219079203SIlya Dryomov struct ceph_object_locator *oloc,
62319079203SIlya Dryomov void *payload,
6246d54228fSIlya Dryomov u32 payload_len,
62519079203SIlya Dryomov u32 timeout,
62619079203SIlya Dryomov struct page ***preply_pages,
62719079203SIlya Dryomov size_t *preply_len);
628b07d3c4bSIlya Dryomov int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
629b07d3c4bSIlya Dryomov struct ceph_osd_linger_request *lreq);
630a4ed38d7SDouglas Fuller int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
631a4ed38d7SDouglas Fuller struct ceph_object_id *oid,
632a4ed38d7SDouglas Fuller struct ceph_object_locator *oloc,
633a4ed38d7SDouglas Fuller struct ceph_watch_item **watchers,
634a4ed38d7SDouglas Fuller u32 *num_watchers);
6353d14c5d2SYehuda Sadeh
636a679e50fSJeff Layton /* Find offset into the buffer of the end of the extent map */
ceph_sparse_ext_map_end(struct ceph_osd_req_op * op)637a679e50fSJeff Layton static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
638a679e50fSJeff Layton {
639a679e50fSJeff Layton struct ceph_sparse_extent *ext;
640a679e50fSJeff Layton
641a679e50fSJeff Layton /* No extents? No data */
642a679e50fSJeff Layton if (op->extent.sparse_ext_cnt == 0)
643a679e50fSJeff Layton return 0;
644a679e50fSJeff Layton
645a679e50fSJeff Layton ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
646a679e50fSJeff Layton
647a679e50fSJeff Layton return ext->off + ext->len - op->extent.offset;
648a679e50fSJeff Layton }
649a679e50fSJeff Layton
650a679e50fSJeff Layton #endif
651