1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 23d14c5d2SYehuda Sadeh #ifndef _FS_CEPH_OSD_CLIENT_H 33d14c5d2SYehuda Sadeh #define _FS_CEPH_OSD_CLIENT_H 43d14c5d2SYehuda Sadeh 5a02a946dSIlya Dryomov #include <linux/bitrev.h> 63d14c5d2SYehuda Sadeh #include <linux/completion.h> 73d14c5d2SYehuda Sadeh #include <linux/kref.h> 83d14c5d2SYehuda Sadeh #include <linux/mempool.h> 93d14c5d2SYehuda Sadeh #include <linux/rbtree.h> 1002113a0fSElena Reshetova #include <linux/refcount.h> 1197e27aaaSXiubo Li #include <linux/ktime.h> 123d14c5d2SYehuda Sadeh 136c4a1915SAlex Elder #include <linux/ceph/types.h> 146c4a1915SAlex Elder #include <linux/ceph/osdmap.h> 156c4a1915SAlex Elder #include <linux/ceph/messenger.h> 16b2aa5d0bSIlya Dryomov #include <linux/ceph/msgpool.h> 176c4a1915SAlex Elder #include <linux/ceph/auth.h> 18c885837fSAlex Elder #include <linux/ceph/pagelist.h> 193d14c5d2SYehuda Sadeh 203d14c5d2SYehuda Sadeh struct ceph_msg; 213d14c5d2SYehuda Sadeh struct ceph_snap_context; 223d14c5d2SYehuda Sadeh struct ceph_osd_request; 233d14c5d2SYehuda Sadeh struct ceph_osd_client; 243d14c5d2SYehuda Sadeh 253d14c5d2SYehuda Sadeh /* 263d14c5d2SYehuda Sadeh * completion callback for async writepages 273d14c5d2SYehuda Sadeh */ 2885e084feSIlya Dryomov typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); 293d14c5d2SYehuda Sadeh 3063244fa1SIlya Dryomov #define CEPH_HOMELESS_OSD -1 3163244fa1SIlya Dryomov 3208b8a044SJeff Layton /* 33a679e50fSJeff Layton * A single extent in a SPARSE_READ reply. 34a679e50fSJeff Layton * 35a679e50fSJeff Layton * Note that these come from the OSD as little-endian values. On BE arches, 36a679e50fSJeff Layton * we convert them in-place after receipt. 37a679e50fSJeff Layton */ 38a679e50fSJeff Layton struct ceph_sparse_extent { 39a679e50fSJeff Layton u64 off; 40a679e50fSJeff Layton u64 len; 41a679e50fSJeff Layton } __packed; 42a679e50fSJeff Layton 43*f628d799SJeff Layton /* Sparse read state machine state values */ 44*f628d799SJeff Layton enum ceph_sparse_read_state { 45*f628d799SJeff Layton CEPH_SPARSE_READ_HDR = 0, 46*f628d799SJeff Layton CEPH_SPARSE_READ_EXTENTS, 47*f628d799SJeff Layton CEPH_SPARSE_READ_DATA_LEN, 48*f628d799SJeff Layton CEPH_SPARSE_READ_DATA, 49*f628d799SJeff Layton }; 50*f628d799SJeff Layton 51*f628d799SJeff Layton /* 52*f628d799SJeff Layton * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of 53*f628d799SJeff Layton * 64-bit offset/length pairs, and then all of the actual file data 54*f628d799SJeff Layton * concatenated after it (sans holes). 55*f628d799SJeff Layton * 56*f628d799SJeff Layton * Unfortunately, we don't know how long the extent array is until we've 57*f628d799SJeff Layton * started reading the data section of the reply. The caller should send down 58*f628d799SJeff Layton * a destination buffer for the array, but we'll alloc one if it's too small 59*f628d799SJeff Layton * or if the caller doesn't. 60*f628d799SJeff Layton */ 61*f628d799SJeff Layton struct ceph_sparse_read { 62*f628d799SJeff Layton enum ceph_sparse_read_state sr_state; /* state machine state */ 63*f628d799SJeff Layton u64 sr_req_off; /* orig request offset */ 64*f628d799SJeff Layton u64 sr_req_len; /* orig request length */ 65*f628d799SJeff Layton u64 sr_pos; /* current pos in buffer */ 66*f628d799SJeff Layton int sr_index; /* current extent index */ 67*f628d799SJeff Layton __le32 sr_datalen; /* length of actual data */ 68*f628d799SJeff Layton u32 sr_count; /* extent count in reply */ 69*f628d799SJeff Layton int sr_ext_len; /* length of extent array */ 70*f628d799SJeff Layton struct ceph_sparse_extent *sr_extent; /* extent array */ 71*f628d799SJeff Layton }; 72*f628d799SJeff Layton 73a679e50fSJeff Layton /* 7408b8a044SJeff Layton * A given osd we're communicating with. 7508b8a044SJeff Layton * 7608b8a044SJeff Layton * Note that the o_requests tree can be searched while holding the "lock" mutex 7708b8a044SJeff Layton * or the "o_requests_lock" spinlock. Insertion or removal requires both! 7808b8a044SJeff Layton */ 793d14c5d2SYehuda Sadeh struct ceph_osd { 8002113a0fSElena Reshetova refcount_t o_ref; 81*f628d799SJeff Layton int o_sparse_op_idx; 823d14c5d2SYehuda Sadeh struct ceph_osd_client *o_osdc; 833d14c5d2SYehuda Sadeh int o_osd; 843d14c5d2SYehuda Sadeh int o_incarnation; 853d14c5d2SYehuda Sadeh struct rb_node o_node; 863d14c5d2SYehuda Sadeh struct ceph_connection o_con; 8708b8a044SJeff Layton spinlock_t o_requests_lock; 885aea3dcdSIlya Dryomov struct rb_root o_requests; 89922dab61SIlya Dryomov struct rb_root o_linger_requests; 90a02a946dSIlya Dryomov struct rb_root o_backoff_mappings; 91a02a946dSIlya Dryomov struct rb_root o_backoffs_by_id; 923d14c5d2SYehuda Sadeh struct list_head o_osd_lru; 936c4a1915SAlex Elder struct ceph_auth_handshake o_auth; 943d14c5d2SYehuda Sadeh unsigned long lru_ttl; 953d14c5d2SYehuda Sadeh struct list_head o_keepalive_item; 965aea3dcdSIlya Dryomov struct mutex lock; 97*f628d799SJeff Layton struct ceph_sparse_read o_sparse_read; 983d14c5d2SYehuda Sadeh }; 993d14c5d2SYehuda Sadeh 1003f1af42aSIlya Dryomov #define CEPH_OSD_SLAB_OPS 2 1013f1af42aSIlya Dryomov #define CEPH_OSD_MAX_OPS 16 1021b83bef2SSage Weil 1032ac2b7a6SAlex Elder enum ceph_osd_data_type { 104ec9123c5SAlex Elder CEPH_OSD_DATA_TYPE_NONE = 0, 1052ac2b7a6SAlex Elder CEPH_OSD_DATA_TYPE_PAGES, 1069a5e6d09SAlex Elder CEPH_OSD_DATA_TYPE_PAGELIST, 1072ac2b7a6SAlex Elder #ifdef CONFIG_BLOCK 1082ac2b7a6SAlex Elder CEPH_OSD_DATA_TYPE_BIO, 1092ac2b7a6SAlex Elder #endif /* CONFIG_BLOCK */ 110b9e281c2SIlya Dryomov CEPH_OSD_DATA_TYPE_BVECS, 1112ac2b7a6SAlex Elder }; 1122ac2b7a6SAlex Elder 1132794a82aSAlex Elder struct ceph_osd_data { 1142ac2b7a6SAlex Elder enum ceph_osd_data_type type; 1152ac2b7a6SAlex Elder union { 1162794a82aSAlex Elder struct { 1172794a82aSAlex Elder struct page **pages; 118e0c59487SAlex Elder u64 length; 1192794a82aSAlex Elder u32 alignment; 1202794a82aSAlex Elder bool pages_from_pool; 1212794a82aSAlex Elder bool own_pages; 1222794a82aSAlex Elder }; 1239a5e6d09SAlex Elder struct ceph_pagelist *pagelist; 1242794a82aSAlex Elder #ifdef CONFIG_BLOCK 125fdce58ccSAlex Elder struct { 1265359a17dSIlya Dryomov struct ceph_bio_iter bio_pos; 1275359a17dSIlya Dryomov u32 bio_length; 128fdce58ccSAlex Elder }; 1292794a82aSAlex Elder #endif /* CONFIG_BLOCK */ 1300010f705SIlya Dryomov struct { 131b9e281c2SIlya Dryomov struct ceph_bvec_iter bvec_pos; 1320010f705SIlya Dryomov u32 num_bvecs; 1330010f705SIlya Dryomov }; 1342794a82aSAlex Elder }; 1352794a82aSAlex Elder }; 1362794a82aSAlex Elder 13779528734SAlex Elder struct ceph_osd_req_op { 13879528734SAlex Elder u16 op; /* CEPH_OSD_OP_* */ 1397b25bf5fSIlya Dryomov u32 flags; /* CEPH_OSD_OP_FLAG_* */ 140de2aa102SIlya Dryomov u32 indata_len; /* request */ 1417665d85bSYan, Zheng u32 outdata_len; /* reply */ 1427665d85bSYan, Zheng s32 rval; 1437665d85bSYan, Zheng 14479528734SAlex Elder union { 14549719778SAlex Elder struct ceph_osd_data raw_data_in; 14679528734SAlex Elder struct { 14779528734SAlex Elder u64 offset, length; 14879528734SAlex Elder u64 truncate_size; 14979528734SAlex Elder u32 truncate_seq; 150a679e50fSJeff Layton int sparse_ext_cnt; 151a679e50fSJeff Layton struct ceph_sparse_extent *sparse_ext; 1525476492fSAlex Elder struct ceph_osd_data osd_data; 15379528734SAlex Elder } extent; 15479528734SAlex Elder struct { 155d7d5a007SIlya Dryomov u32 name_len; 156d7d5a007SIlya Dryomov u32 value_len; 157d74b50beSYan, Zheng __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ 158d74b50beSYan, Zheng __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ 159d74b50beSYan, Zheng struct ceph_osd_data osd_data; 160d74b50beSYan, Zheng } xattr; 161d74b50beSYan, Zheng struct { 16279528734SAlex Elder const char *class_name; 16379528734SAlex Elder const char *method_name; 1645476492fSAlex Elder struct ceph_osd_data request_info; 16504017e29SAlex Elder struct ceph_osd_data request_data; 1665476492fSAlex Elder struct ceph_osd_data response_data; 16779528734SAlex Elder __u8 class_len; 16879528734SAlex Elder __u8 method_len; 169bb873b53SIlya Dryomov u32 indata_len; 17079528734SAlex Elder } cls; 17179528734SAlex Elder struct { 17279528734SAlex Elder u64 cookie; 173922dab61SIlya Dryomov __u8 op; /* CEPH_OSD_WATCH_OP_ */ 174922dab61SIlya Dryomov u32 gen; 17579528734SAlex Elder } watch; 176c647b8a8SIlya Dryomov struct { 177922dab61SIlya Dryomov struct ceph_osd_data request_data; 178922dab61SIlya Dryomov } notify_ack; 179922dab61SIlya Dryomov struct { 18019079203SIlya Dryomov u64 cookie; 18119079203SIlya Dryomov struct ceph_osd_data request_data; 18219079203SIlya Dryomov struct ceph_osd_data response_data; 18319079203SIlya Dryomov } notify; 18419079203SIlya Dryomov struct { 185a4ed38d7SDouglas Fuller struct ceph_osd_data response_data; 186a4ed38d7SDouglas Fuller } list_watchers; 187a4ed38d7SDouglas Fuller struct { 188c647b8a8SIlya Dryomov u64 expected_object_size; 189c647b8a8SIlya Dryomov u64 expected_write_size; 190d3798accSIlya Dryomov u32 flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */ 191c647b8a8SIlya Dryomov } alloc_hint; 19223ddf9beSLuis Henriques struct { 19323ddf9beSLuis Henriques u64 snapid; 19423ddf9beSLuis Henriques u64 src_version; 19523ddf9beSLuis Henriques u8 flags; 19623ddf9beSLuis Henriques u32 src_fadvise_flags; 19723ddf9beSLuis Henriques struct ceph_osd_data osd_data; 19823ddf9beSLuis Henriques } copy_from; 19979528734SAlex Elder }; 20079528734SAlex Elder }; 20179528734SAlex Elder 20263244fa1SIlya Dryomov struct ceph_osd_request_target { 20363244fa1SIlya Dryomov struct ceph_object_id base_oid; 20463244fa1SIlya Dryomov struct ceph_object_locator base_oloc; 20563244fa1SIlya Dryomov struct ceph_object_id target_oid; 20663244fa1SIlya Dryomov struct ceph_object_locator target_oloc; 20763244fa1SIlya Dryomov 208dc98ff72SIlya Dryomov struct ceph_pg pgid; /* last raw pg we mapped to */ 209dc98ff72SIlya Dryomov struct ceph_spg spgid; /* last actual spg we mapped to */ 21063244fa1SIlya Dryomov u32 pg_num; 21163244fa1SIlya Dryomov u32 pg_num_mask; 21263244fa1SIlya Dryomov struct ceph_osds acting; 21363244fa1SIlya Dryomov struct ceph_osds up; 21463244fa1SIlya Dryomov int size; 21563244fa1SIlya Dryomov int min_size; 21663244fa1SIlya Dryomov bool sort_bitwise; 217ae78dd81SIlya Dryomov bool recovery_deletes; 21863244fa1SIlya Dryomov 21963244fa1SIlya Dryomov unsigned int flags; /* CEPH_OSD_FLAG_* */ 220117d96a0SIlya Dryomov bool used_replica; 22163244fa1SIlya Dryomov bool paused; 22263244fa1SIlya Dryomov 22304c7d789SIlya Dryomov u32 epoch; 224dc93e0e2SIlya Dryomov u32 last_force_resend; 225dc93e0e2SIlya Dryomov 22663244fa1SIlya Dryomov int osd; 22763244fa1SIlya Dryomov }; 22863244fa1SIlya Dryomov 2293d14c5d2SYehuda Sadeh /* an in-flight request */ 2303d14c5d2SYehuda Sadeh struct ceph_osd_request { 2313d14c5d2SYehuda Sadeh u64 r_tid; /* unique for this client */ 2323d14c5d2SYehuda Sadeh struct rb_node r_node; 2334609245eSIlya Dryomov struct rb_node r_mc_node; /* map check */ 23488bc1922SIlya Dryomov struct work_struct r_complete_work; 2353d14c5d2SYehuda Sadeh struct ceph_osd *r_osd; 236a66dd383SIlya Dryomov 237a66dd383SIlya Dryomov struct ceph_osd_request_target r_t; 238a66dd383SIlya Dryomov #define r_base_oid r_t.base_oid 239a66dd383SIlya Dryomov #define r_base_oloc r_t.base_oloc 240a66dd383SIlya Dryomov #define r_flags r_t.flags 2413d14c5d2SYehuda Sadeh 2423d14c5d2SYehuda Sadeh struct ceph_msg *r_request, *r_reply; 2433d14c5d2SYehuda Sadeh u32 r_sent; /* >0 if r_request is sending/sent */ 2441b83bef2SSage Weil 24579528734SAlex Elder /* request osd ops array */ 24679528734SAlex Elder unsigned int r_num_ops; 24779528734SAlex Elder 2481b83bef2SSage Weil int r_result; 2493d14c5d2SYehuda Sadeh 2503d14c5d2SYehuda Sadeh struct ceph_osd_client *r_osdc; 2513d14c5d2SYehuda Sadeh struct kref r_kref; 2523d14c5d2SYehuda Sadeh bool r_mempool; 253b18b9550SIlya Dryomov struct completion r_completion; /* private to osd_client.c */ 25426be8808SAlex Elder ceph_osdc_callback_t r_callback; 2553d14c5d2SYehuda Sadeh 2563d14c5d2SYehuda Sadeh struct inode *r_inode; /* for use by callbacks */ 25794e85771SIlya Dryomov struct list_head r_private_item; /* ditto */ 2583d14c5d2SYehuda Sadeh void *r_priv; /* ditto */ 2593d14c5d2SYehuda Sadeh 260bb873b53SIlya Dryomov /* set by submitter */ 261bb873b53SIlya Dryomov u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */ 262bb873b53SIlya Dryomov struct ceph_snap_context *r_snapc; /* for writes */ 263fac02ddfSArnd Bergmann struct timespec64 r_mtime; /* ditto */ 264bb873b53SIlya Dryomov u64 r_data_offset; /* ditto */ 265922dab61SIlya Dryomov bool r_linger; /* don't resend on failure */ 2663d14c5d2SYehuda Sadeh 267bb873b53SIlya Dryomov /* internal */ 268bb873b53SIlya Dryomov unsigned long r_stamp; /* jiffies, send or check time */ 2697cc5e38fSIlya Dryomov unsigned long r_start_stamp; /* jiffies */ 27097e27aaaSXiubo Li ktime_t r_start_latency; /* ktime_t */ 27197e27aaaSXiubo Li ktime_t r_end_latency; /* ktime_t */ 272bb873b53SIlya Dryomov int r_attempts; 2734609245eSIlya Dryomov u32 r_map_dne_bound; 2743f1af42aSIlya Dryomov 2753f1af42aSIlya Dryomov struct ceph_osd_req_op r_ops[]; 2763d14c5d2SYehuda Sadeh }; 2773d14c5d2SYehuda Sadeh 278205ee118SIlya Dryomov struct ceph_request_redirect { 279205ee118SIlya Dryomov struct ceph_object_locator oloc; 280205ee118SIlya Dryomov }; 281205ee118SIlya Dryomov 2828cb441c0SIlya Dryomov /* 2838cb441c0SIlya Dryomov * osd request identifier 2848cb441c0SIlya Dryomov * 2858cb441c0SIlya Dryomov * caller name + incarnation# + tid to unique identify this request 2868cb441c0SIlya Dryomov */ 2878cb441c0SIlya Dryomov struct ceph_osd_reqid { 2888cb441c0SIlya Dryomov struct ceph_entity_name name; 2898cb441c0SIlya Dryomov __le64 tid; 2908cb441c0SIlya Dryomov __le32 inc; 2918cb441c0SIlya Dryomov } __packed; 2928cb441c0SIlya Dryomov 2938cb441c0SIlya Dryomov struct ceph_blkin_trace_info { 2948cb441c0SIlya Dryomov __le64 trace_id; 2958cb441c0SIlya Dryomov __le64 span_id; 2968cb441c0SIlya Dryomov __le64 parent_span_id; 2978cb441c0SIlya Dryomov } __packed; 2988cb441c0SIlya Dryomov 299922dab61SIlya Dryomov typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie, 300922dab61SIlya Dryomov u64 notifier_id, void *data, size_t data_len); 301922dab61SIlya Dryomov typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err); 302a40c4f10SYehuda Sadeh 303922dab61SIlya Dryomov struct ceph_osd_linger_request { 304922dab61SIlya Dryomov struct ceph_osd_client *osdc; 305922dab61SIlya Dryomov u64 linger_id; 306922dab61SIlya Dryomov bool committed; 30719079203SIlya Dryomov bool is_watch; /* watch or notify */ 308922dab61SIlya Dryomov 309922dab61SIlya Dryomov struct ceph_osd *osd; 310922dab61SIlya Dryomov struct ceph_osd_request *reg_req; 311922dab61SIlya Dryomov struct ceph_osd_request *ping_req; 312922dab61SIlya Dryomov unsigned long ping_sent; 313b07d3c4bSIlya Dryomov unsigned long watch_valid_thru; 314b07d3c4bSIlya Dryomov struct list_head pending_lworks; 315922dab61SIlya Dryomov 316922dab61SIlya Dryomov struct ceph_osd_request_target t; 3174609245eSIlya Dryomov u32 map_dne_bound; 318922dab61SIlya Dryomov 319fac02ddfSArnd Bergmann struct timespec64 mtime; 320922dab61SIlya Dryomov 321922dab61SIlya Dryomov struct kref kref; 322922dab61SIlya Dryomov struct mutex lock; 323922dab61SIlya Dryomov struct rb_node node; /* osd */ 324922dab61SIlya Dryomov struct rb_node osdc_node; /* osdc */ 3254609245eSIlya Dryomov struct rb_node mc_node; /* map check */ 326922dab61SIlya Dryomov struct list_head scan_item; 327922dab61SIlya Dryomov 328922dab61SIlya Dryomov struct completion reg_commit_wait; 32919079203SIlya Dryomov struct completion notify_finish_wait; 330922dab61SIlya Dryomov int reg_commit_error; 33119079203SIlya Dryomov int notify_finish_error; 332922dab61SIlya Dryomov int last_error; 333922dab61SIlya Dryomov 334922dab61SIlya Dryomov u32 register_gen; 33519079203SIlya Dryomov u64 notify_id; 336922dab61SIlya Dryomov 337922dab61SIlya Dryomov rados_watchcb2_t wcb; 338922dab61SIlya Dryomov rados_watcherrcb_t errcb; 339922dab61SIlya Dryomov void *data; 34019079203SIlya Dryomov 34175dbb685SIlya Dryomov struct ceph_pagelist *request_pl; 34275dbb685SIlya Dryomov struct page **notify_id_pages; 34375dbb685SIlya Dryomov 34419079203SIlya Dryomov struct page ***preply_pages; 34519079203SIlya Dryomov size_t *preply_len; 346a40c4f10SYehuda Sadeh }; 347a40c4f10SYehuda Sadeh 348a4ed38d7SDouglas Fuller struct ceph_watch_item { 349a4ed38d7SDouglas Fuller struct ceph_entity_name name; 350a4ed38d7SDouglas Fuller u64 cookie; 351a4ed38d7SDouglas Fuller struct ceph_entity_addr addr; 352a4ed38d7SDouglas Fuller }; 353a4ed38d7SDouglas Fuller 354a02a946dSIlya Dryomov struct ceph_spg_mapping { 355a02a946dSIlya Dryomov struct rb_node node; 356a02a946dSIlya Dryomov struct ceph_spg spgid; 357a02a946dSIlya Dryomov 358a02a946dSIlya Dryomov struct rb_root backoffs; 359a02a946dSIlya Dryomov }; 360a02a946dSIlya Dryomov 361a02a946dSIlya Dryomov struct ceph_hobject_id { 362a02a946dSIlya Dryomov void *key; 363a02a946dSIlya Dryomov size_t key_len; 364a02a946dSIlya Dryomov void *oid; 365a02a946dSIlya Dryomov size_t oid_len; 366a02a946dSIlya Dryomov u64 snapid; 367a02a946dSIlya Dryomov u32 hash; 368a02a946dSIlya Dryomov u8 is_max; 369a02a946dSIlya Dryomov void *nspace; 370a02a946dSIlya Dryomov size_t nspace_len; 371a02a946dSIlya Dryomov s64 pool; 372a02a946dSIlya Dryomov 373a02a946dSIlya Dryomov /* cache */ 374a02a946dSIlya Dryomov u32 hash_reverse_bits; 375a02a946dSIlya Dryomov }; 376a02a946dSIlya Dryomov 377a02a946dSIlya Dryomov static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid) 378a02a946dSIlya Dryomov { 379a02a946dSIlya Dryomov hoid->hash_reverse_bits = bitrev32(hoid->hash); 380a02a946dSIlya Dryomov } 381a02a946dSIlya Dryomov 382a02a946dSIlya Dryomov /* 383a02a946dSIlya Dryomov * PG-wide backoff: [begin, end) 384a02a946dSIlya Dryomov * per-object backoff: begin == end 385a02a946dSIlya Dryomov */ 386a02a946dSIlya Dryomov struct ceph_osd_backoff { 387a02a946dSIlya Dryomov struct rb_node spg_node; 388a02a946dSIlya Dryomov struct rb_node id_node; 389a02a946dSIlya Dryomov 390a02a946dSIlya Dryomov struct ceph_spg spgid; 391a02a946dSIlya Dryomov u64 id; 392a02a946dSIlya Dryomov struct ceph_hobject_id *begin; 393a02a946dSIlya Dryomov struct ceph_hobject_id *end; 394a02a946dSIlya Dryomov }; 395a02a946dSIlya Dryomov 396264048afSIlya Dryomov #define CEPH_LINGER_ID_START 0xffff000000000000ULL 397264048afSIlya Dryomov 3983d14c5d2SYehuda Sadeh struct ceph_osd_client { 3993d14c5d2SYehuda Sadeh struct ceph_client *client; 4003d14c5d2SYehuda Sadeh 4013d14c5d2SYehuda Sadeh struct ceph_osdmap *osdmap; /* current map */ 4025aea3dcdSIlya Dryomov struct rw_semaphore lock; 4033d14c5d2SYehuda Sadeh 4043d14c5d2SYehuda Sadeh struct rb_root osds; /* osds */ 4053d14c5d2SYehuda Sadeh struct list_head osd_lru; /* idle osds */ 4069dd2845cSIlya Dryomov spinlock_t osd_lru_lock; 40758eb7932SJeff Layton u32 epoch_barrier; 4085aea3dcdSIlya Dryomov struct ceph_osd homeless_osd; 4095aea3dcdSIlya Dryomov atomic64_t last_tid; /* tid of last request */ 410922dab61SIlya Dryomov u64 last_linger_id; 411922dab61SIlya Dryomov struct rb_root linger_requests; /* lingering requests */ 4124609245eSIlya Dryomov struct rb_root map_checks; 4134609245eSIlya Dryomov struct rb_root linger_map_checks; 4145aea3dcdSIlya Dryomov atomic_t num_requests; 4155aea3dcdSIlya Dryomov atomic_t num_homeless; 41666850df5SIlya Dryomov int abort_err; 4173d14c5d2SYehuda Sadeh struct delayed_work timeout_work; 4183d14c5d2SYehuda Sadeh struct delayed_work osds_timeout_work; 4193d14c5d2SYehuda Sadeh #ifdef CONFIG_DEBUG_FS 4203d14c5d2SYehuda Sadeh struct dentry *debugfs_file; 4213d14c5d2SYehuda Sadeh #endif 4223d14c5d2SYehuda Sadeh 4233d14c5d2SYehuda Sadeh mempool_t *req_mempool; 4243d14c5d2SYehuda Sadeh 4253d14c5d2SYehuda Sadeh struct ceph_msgpool msgpool_op; 4263d14c5d2SYehuda Sadeh struct ceph_msgpool msgpool_op_reply; 427a40c4f10SYehuda Sadeh 428a40c4f10SYehuda Sadeh struct workqueue_struct *notify_wq; 42988bc1922SIlya Dryomov struct workqueue_struct *completion_wq; 4303d14c5d2SYehuda Sadeh }; 4313d14c5d2SYehuda Sadeh 432b7ec35b3SIlya Dryomov static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) 433b7ec35b3SIlya Dryomov { 434b7ec35b3SIlya Dryomov return osdc->osdmap->flags & flag; 435b7ec35b3SIlya Dryomov } 436b7ec35b3SIlya Dryomov 4375522ae0bSAlex Elder extern int ceph_osdc_setup(void); 4385522ae0bSAlex Elder extern void ceph_osdc_cleanup(void); 4395522ae0bSAlex Elder 4403d14c5d2SYehuda Sadeh extern int ceph_osdc_init(struct ceph_osd_client *osdc, 4413d14c5d2SYehuda Sadeh struct ceph_client *client); 4423d14c5d2SYehuda Sadeh extern void ceph_osdc_stop(struct ceph_osd_client *osdc); 443120a75eaSYan, Zheng extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); 4443d14c5d2SYehuda Sadeh 4453d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, 4463d14c5d2SYehuda Sadeh struct ceph_msg *msg); 4473d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, 4483d14c5d2SYehuda Sadeh struct ceph_msg *msg); 44958eb7932SJeff Layton void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); 45066850df5SIlya Dryomov void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err); 4512cef0ba8SYan, Zheng void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc); 4523d14c5d2SYehuda Sadeh 4534cf3e6dfSIlya Dryomov #define osd_req_op_data(oreq, whch, typ, fld) \ 4544cf3e6dfSIlya Dryomov ({ \ 4554cf3e6dfSIlya Dryomov struct ceph_osd_request *__oreq = (oreq); \ 4564cf3e6dfSIlya Dryomov unsigned int __whch = (whch); \ 4574cf3e6dfSIlya Dryomov BUG_ON(__whch >= __oreq->r_num_ops); \ 4584cf3e6dfSIlya Dryomov &__oreq->r_ops[__whch].typ.fld; \ 4594cf3e6dfSIlya Dryomov }) 4604cf3e6dfSIlya Dryomov 461042f6498SJeff Layton struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req, 462144cba14SYan, Zheng unsigned int which, u16 opcode, u32 flags); 46349719778SAlex Elder 46449719778SAlex Elder extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, 46549719778SAlex Elder unsigned int which, 46649719778SAlex Elder struct page **pages, u64 length, 46749719778SAlex Elder u32 alignment, bool pages_from_pool, 46849719778SAlex Elder bool own_pages); 46949719778SAlex Elder 470c99d2d4aSAlex Elder extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req, 471c99d2d4aSAlex Elder unsigned int which, u16 opcode, 47233803f33SAlex Elder u64 offset, u64 length, 47333803f33SAlex Elder u64 truncate_size, u32 truncate_seq); 474c99d2d4aSAlex Elder extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req, 475c99d2d4aSAlex Elder unsigned int which, u64 length); 4762c63f49aSYan, Zheng extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, 4772c63f49aSYan, Zheng unsigned int which, u64 offset_inc); 478a4ce40a9SAlex Elder 479a4ce40a9SAlex Elder extern struct ceph_osd_data *osd_req_op_extent_osd_data( 480a4ce40a9SAlex Elder struct ceph_osd_request *osd_req, 481406e2c9fSAlex Elder unsigned int which); 482a4ce40a9SAlex Elder 483a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, 484406e2c9fSAlex Elder unsigned int which, 485a4ce40a9SAlex Elder struct page **pages, u64 length, 486a4ce40a9SAlex Elder u32 alignment, bool pages_from_pool, 487a4ce40a9SAlex Elder bool own_pages); 488a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, 489406e2c9fSAlex Elder unsigned int which, 490a4ce40a9SAlex Elder struct ceph_pagelist *pagelist); 491a4ce40a9SAlex Elder #ifdef CONFIG_BLOCK 4925359a17dSIlya Dryomov void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, 493406e2c9fSAlex Elder unsigned int which, 4945359a17dSIlya Dryomov struct ceph_bio_iter *bio_pos, 4955359a17dSIlya Dryomov u32 bio_length); 496a4ce40a9SAlex Elder #endif /* CONFIG_BLOCK */ 4970010f705SIlya Dryomov void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, 4980010f705SIlya Dryomov unsigned int which, 4990010f705SIlya Dryomov struct bio_vec *bvecs, u32 num_bvecs, 5000010f705SIlya Dryomov u32 bytes); 501b9e281c2SIlya Dryomov void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, 502b9e281c2SIlya Dryomov unsigned int which, 503b9e281c2SIlya Dryomov struct ceph_bvec_iter *bvec_pos); 504a4ce40a9SAlex Elder 50504017e29SAlex Elder extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, 50604017e29SAlex Elder unsigned int which, 50704017e29SAlex Elder struct ceph_pagelist *pagelist); 5086c57b554SAlex Elder extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, 5096c57b554SAlex Elder unsigned int which, 5106c57b554SAlex Elder struct page **pages, u64 length, 5116c57b554SAlex Elder u32 alignment, bool pages_from_pool, 5126c57b554SAlex Elder bool own_pages); 513b9e281c2SIlya Dryomov void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, 514b9e281c2SIlya Dryomov unsigned int which, 5150010f705SIlya Dryomov struct bio_vec *bvecs, u32 num_bvecs, 5160010f705SIlya Dryomov u32 bytes); 517a4ce40a9SAlex Elder extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, 518c99d2d4aSAlex Elder unsigned int which, 519a4ce40a9SAlex Elder struct page **pages, u64 length, 520a4ce40a9SAlex Elder u32 alignment, bool pages_from_pool, 521a4ce40a9SAlex Elder bool own_pages); 52224639ce5SIlya Dryomov int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 52304017e29SAlex Elder const char *class, const char *method); 524d74b50beSYan, Zheng extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, 525d74b50beSYan, Zheng u16 opcode, const char *name, const void *value, 526d74b50beSYan, Zheng size_t size, u8 cmp_op, u8 cmp_mode); 527c647b8a8SIlya Dryomov extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, 528c647b8a8SIlya Dryomov unsigned int which, 529c647b8a8SIlya Dryomov u64 expected_object_size, 530d3798accSIlya Dryomov u64 expected_write_size, 531d3798accSIlya Dryomov u32 flags); 532aca39d9eSLuís Henriques extern int osd_req_op_copy_from_init(struct ceph_osd_request *req, 533aca39d9eSLuís Henriques u64 src_snapid, u64 src_version, 534aca39d9eSLuís Henriques struct ceph_object_id *src_oid, 535aca39d9eSLuís Henriques struct ceph_object_locator *src_oloc, 536aca39d9eSLuís Henriques u32 src_fadvise_flags, 537aca39d9eSLuís Henriques u32 dst_fadvise_flags, 538aca39d9eSLuís Henriques u32 truncate_seq, u64 truncate_size, 539aca39d9eSLuís Henriques u8 copy_from_flags); 54033803f33SAlex Elder 5413d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 5423d14c5d2SYehuda Sadeh struct ceph_snap_context *snapc, 543acead002SAlex Elder unsigned int num_ops, 5443d14c5d2SYehuda Sadeh bool use_mempool, 54554a54007SAlex Elder gfp_t gfp_flags); 54613d1ad16SIlya Dryomov int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); 5473d14c5d2SYehuda Sadeh 5483d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, 5493d14c5d2SYehuda Sadeh struct ceph_file_layout *layout, 5503d14c5d2SYehuda Sadeh struct ceph_vino vino, 551acead002SAlex Elder u64 offset, u64 *len, 552715e4cd4SYan, Zheng unsigned int which, int num_ops, 553715e4cd4SYan, Zheng int opcode, int flags, 5543d14c5d2SYehuda Sadeh struct ceph_snap_context *snapc, 555acead002SAlex Elder u32 truncate_seq, u64 truncate_size, 556153e5167SAlex Elder bool use_mempool); 5573d14c5d2SYehuda Sadeh 558a679e50fSJeff Layton int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); 559a679e50fSJeff Layton 560a679e50fSJeff Layton /* 561a679e50fSJeff Layton * How big an extent array should we preallocate for a sparse read? This is 562a679e50fSJeff Layton * just a starting value. If we get more than this back from the OSD, the 563a679e50fSJeff Layton * receiver will reallocate. 564a679e50fSJeff Layton */ 565a679e50fSJeff Layton #define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 566a679e50fSJeff Layton 567a679e50fSJeff Layton static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) 568a679e50fSJeff Layton { 569a679e50fSJeff Layton return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); 570a679e50fSJeff Layton } 571a679e50fSJeff Layton 5729e94af20SIlya Dryomov extern void ceph_osdc_get_request(struct ceph_osd_request *req); 5739e94af20SIlya Dryomov extern void ceph_osdc_put_request(struct ceph_osd_request *req); 5743d14c5d2SYehuda Sadeh 575a8af0d68SJeff Layton void ceph_osdc_start_request(struct ceph_osd_client *osdc, 576a8af0d68SJeff Layton struct ceph_osd_request *req); 577c9f9b93dSIlya Dryomov extern void ceph_osdc_cancel_request(struct ceph_osd_request *req); 5783d14c5d2SYehuda Sadeh extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, 5793d14c5d2SYehuda Sadeh struct ceph_osd_request *req); 5803d14c5d2SYehuda Sadeh extern void ceph_osdc_sync(struct ceph_osd_client *osdc); 5813d14c5d2SYehuda Sadeh 582dd935f44SJosh Durgin extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); 5837cca78c9SIlya Dryomov void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc); 584dd935f44SJosh Durgin 585428a7158SDouglas Fuller int ceph_osdc_call(struct ceph_osd_client *osdc, 586428a7158SDouglas Fuller struct ceph_object_id *oid, 587428a7158SDouglas Fuller struct ceph_object_locator *oloc, 588428a7158SDouglas Fuller const char *class, const char *method, 589428a7158SDouglas Fuller unsigned int flags, 590428a7158SDouglas Fuller struct page *req_page, size_t req_len, 59168ada915SIlya Dryomov struct page **resp_pages, size_t *resp_len); 592428a7158SDouglas Fuller 593922dab61SIlya Dryomov /* watch/notify */ 594922dab61SIlya Dryomov struct ceph_osd_linger_request * 595922dab61SIlya Dryomov ceph_osdc_watch(struct ceph_osd_client *osdc, 596922dab61SIlya Dryomov struct ceph_object_id *oid, 597922dab61SIlya Dryomov struct ceph_object_locator *oloc, 598922dab61SIlya Dryomov rados_watchcb2_t wcb, 599922dab61SIlya Dryomov rados_watcherrcb_t errcb, 600922dab61SIlya Dryomov void *data); 601922dab61SIlya Dryomov int ceph_osdc_unwatch(struct ceph_osd_client *osdc, 602922dab61SIlya Dryomov struct ceph_osd_linger_request *lreq); 603922dab61SIlya Dryomov 604922dab61SIlya Dryomov int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, 605922dab61SIlya Dryomov struct ceph_object_id *oid, 606922dab61SIlya Dryomov struct ceph_object_locator *oloc, 607922dab61SIlya Dryomov u64 notify_id, 608922dab61SIlya Dryomov u64 cookie, 609922dab61SIlya Dryomov void *payload, 6106d54228fSIlya Dryomov u32 payload_len); 61119079203SIlya Dryomov int ceph_osdc_notify(struct ceph_osd_client *osdc, 61219079203SIlya Dryomov struct ceph_object_id *oid, 61319079203SIlya Dryomov struct ceph_object_locator *oloc, 61419079203SIlya Dryomov void *payload, 6156d54228fSIlya Dryomov u32 payload_len, 61619079203SIlya Dryomov u32 timeout, 61719079203SIlya Dryomov struct page ***preply_pages, 61819079203SIlya Dryomov size_t *preply_len); 619b07d3c4bSIlya Dryomov int ceph_osdc_watch_check(struct ceph_osd_client *osdc, 620b07d3c4bSIlya Dryomov struct ceph_osd_linger_request *lreq); 621a4ed38d7SDouglas Fuller int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, 622a4ed38d7SDouglas Fuller struct ceph_object_id *oid, 623a4ed38d7SDouglas Fuller struct ceph_object_locator *oloc, 624a4ed38d7SDouglas Fuller struct ceph_watch_item **watchers, 625a4ed38d7SDouglas Fuller u32 *num_watchers); 6263d14c5d2SYehuda Sadeh 627a679e50fSJeff Layton /* Find offset into the buffer of the end of the extent map */ 628a679e50fSJeff Layton static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op) 629a679e50fSJeff Layton { 630a679e50fSJeff Layton struct ceph_sparse_extent *ext; 631a679e50fSJeff Layton 632a679e50fSJeff Layton /* No extents? No data */ 633a679e50fSJeff Layton if (op->extent.sparse_ext_cnt == 0) 634a679e50fSJeff Layton return 0; 635a679e50fSJeff Layton 636a679e50fSJeff Layton ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1]; 637a679e50fSJeff Layton 638a679e50fSJeff Layton return ext->off + ext->len - op->extent.offset; 639a679e50fSJeff Layton } 640a679e50fSJeff Layton 641a679e50fSJeff Layton #endif 642