1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 23d14c5d2SYehuda Sadeh #ifndef _FS_CEPH_OSD_CLIENT_H 33d14c5d2SYehuda Sadeh #define _FS_CEPH_OSD_CLIENT_H 43d14c5d2SYehuda Sadeh 5a02a946dSIlya Dryomov #include <linux/bitrev.h> 63d14c5d2SYehuda Sadeh #include <linux/completion.h> 73d14c5d2SYehuda Sadeh #include <linux/kref.h> 83d14c5d2SYehuda Sadeh #include <linux/mempool.h> 93d14c5d2SYehuda Sadeh #include <linux/rbtree.h> 1002113a0fSElena Reshetova #include <linux/refcount.h> 1197e27aaaSXiubo Li #include <linux/ktime.h> 123d14c5d2SYehuda Sadeh 136c4a1915SAlex Elder #include <linux/ceph/types.h> 146c4a1915SAlex Elder #include <linux/ceph/osdmap.h> 156c4a1915SAlex Elder #include <linux/ceph/messenger.h> 16b2aa5d0bSIlya Dryomov #include <linux/ceph/msgpool.h> 176c4a1915SAlex Elder #include <linux/ceph/auth.h> 18c885837fSAlex Elder #include <linux/ceph/pagelist.h> 193d14c5d2SYehuda Sadeh 203d14c5d2SYehuda Sadeh struct ceph_msg; 213d14c5d2SYehuda Sadeh struct ceph_snap_context; 223d14c5d2SYehuda Sadeh struct ceph_osd_request; 233d14c5d2SYehuda Sadeh struct ceph_osd_client; 243d14c5d2SYehuda Sadeh 253d14c5d2SYehuda Sadeh /* 263d14c5d2SYehuda Sadeh * completion callback for async writepages 273d14c5d2SYehuda Sadeh */ 2885e084feSIlya Dryomov typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); 293d14c5d2SYehuda Sadeh 3063244fa1SIlya Dryomov #define CEPH_HOMELESS_OSD -1 3163244fa1SIlya Dryomov 3208b8a044SJeff Layton /* 33*a679e50fSJeff Layton * A single extent in a SPARSE_READ reply. 34*a679e50fSJeff Layton * 35*a679e50fSJeff Layton * Note that these come from the OSD as little-endian values. On BE arches, 36*a679e50fSJeff Layton * we convert them in-place after receipt. 37*a679e50fSJeff Layton */ 38*a679e50fSJeff Layton struct ceph_sparse_extent { 39*a679e50fSJeff Layton u64 off; 40*a679e50fSJeff Layton u64 len; 41*a679e50fSJeff Layton } __packed; 42*a679e50fSJeff Layton 43*a679e50fSJeff Layton /* 4408b8a044SJeff Layton * A given osd we're communicating with. 4508b8a044SJeff Layton * 4608b8a044SJeff Layton * Note that the o_requests tree can be searched while holding the "lock" mutex 4708b8a044SJeff Layton * or the "o_requests_lock" spinlock. Insertion or removal requires both! 4808b8a044SJeff Layton */ 493d14c5d2SYehuda Sadeh struct ceph_osd { 5002113a0fSElena Reshetova refcount_t o_ref; 513d14c5d2SYehuda Sadeh struct ceph_osd_client *o_osdc; 523d14c5d2SYehuda Sadeh int o_osd; 533d14c5d2SYehuda Sadeh int o_incarnation; 543d14c5d2SYehuda Sadeh struct rb_node o_node; 553d14c5d2SYehuda Sadeh struct ceph_connection o_con; 5608b8a044SJeff Layton spinlock_t o_requests_lock; 575aea3dcdSIlya Dryomov struct rb_root o_requests; 58922dab61SIlya Dryomov struct rb_root o_linger_requests; 59a02a946dSIlya Dryomov struct rb_root o_backoff_mappings; 60a02a946dSIlya Dryomov struct rb_root o_backoffs_by_id; 613d14c5d2SYehuda Sadeh struct list_head o_osd_lru; 626c4a1915SAlex Elder struct ceph_auth_handshake o_auth; 633d14c5d2SYehuda Sadeh unsigned long lru_ttl; 643d14c5d2SYehuda Sadeh struct list_head o_keepalive_item; 655aea3dcdSIlya Dryomov struct mutex lock; 663d14c5d2SYehuda Sadeh }; 673d14c5d2SYehuda Sadeh 683f1af42aSIlya Dryomov #define CEPH_OSD_SLAB_OPS 2 693f1af42aSIlya Dryomov #define CEPH_OSD_MAX_OPS 16 701b83bef2SSage Weil 712ac2b7a6SAlex Elder enum ceph_osd_data_type { 72ec9123c5SAlex Elder CEPH_OSD_DATA_TYPE_NONE = 0, 732ac2b7a6SAlex Elder CEPH_OSD_DATA_TYPE_PAGES, 749a5e6d09SAlex Elder CEPH_OSD_DATA_TYPE_PAGELIST, 752ac2b7a6SAlex Elder #ifdef CONFIG_BLOCK 762ac2b7a6SAlex Elder CEPH_OSD_DATA_TYPE_BIO, 772ac2b7a6SAlex Elder #endif /* CONFIG_BLOCK */ 78b9e281c2SIlya Dryomov CEPH_OSD_DATA_TYPE_BVECS, 792ac2b7a6SAlex Elder }; 802ac2b7a6SAlex Elder 812794a82aSAlex Elder struct ceph_osd_data { 822ac2b7a6SAlex Elder enum ceph_osd_data_type type; 832ac2b7a6SAlex Elder union { 842794a82aSAlex Elder struct { 852794a82aSAlex Elder struct page **pages; 86e0c59487SAlex Elder u64 length; 872794a82aSAlex Elder u32 alignment; 882794a82aSAlex Elder bool pages_from_pool; 892794a82aSAlex Elder bool own_pages; 902794a82aSAlex Elder }; 919a5e6d09SAlex Elder struct ceph_pagelist *pagelist; 922794a82aSAlex Elder #ifdef CONFIG_BLOCK 93fdce58ccSAlex Elder struct { 945359a17dSIlya Dryomov struct ceph_bio_iter bio_pos; 955359a17dSIlya Dryomov u32 bio_length; 96fdce58ccSAlex Elder }; 972794a82aSAlex Elder #endif /* CONFIG_BLOCK */ 980010f705SIlya Dryomov struct { 99b9e281c2SIlya Dryomov struct ceph_bvec_iter bvec_pos; 1000010f705SIlya Dryomov u32 num_bvecs; 1010010f705SIlya Dryomov }; 1022794a82aSAlex Elder }; 1032794a82aSAlex Elder }; 1042794a82aSAlex Elder 10579528734SAlex Elder struct ceph_osd_req_op { 10679528734SAlex Elder u16 op; /* CEPH_OSD_OP_* */ 1077b25bf5fSIlya Dryomov u32 flags; /* CEPH_OSD_OP_FLAG_* */ 108de2aa102SIlya Dryomov u32 indata_len; /* request */ 1097665d85bSYan, Zheng u32 outdata_len; /* reply */ 1107665d85bSYan, Zheng s32 rval; 1117665d85bSYan, Zheng 11279528734SAlex Elder union { 11349719778SAlex Elder struct ceph_osd_data raw_data_in; 11479528734SAlex Elder struct { 11579528734SAlex Elder u64 offset, length; 11679528734SAlex Elder u64 truncate_size; 11779528734SAlex Elder u32 truncate_seq; 118*a679e50fSJeff Layton int sparse_ext_cnt; 119*a679e50fSJeff Layton struct ceph_sparse_extent *sparse_ext; 1205476492fSAlex Elder struct ceph_osd_data osd_data; 12179528734SAlex Elder } extent; 12279528734SAlex Elder struct { 123d7d5a007SIlya Dryomov u32 name_len; 124d7d5a007SIlya Dryomov u32 value_len; 125d74b50beSYan, Zheng __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ 126d74b50beSYan, Zheng __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ 127d74b50beSYan, Zheng struct ceph_osd_data osd_data; 128d74b50beSYan, Zheng } xattr; 129d74b50beSYan, Zheng struct { 13079528734SAlex Elder const char *class_name; 13179528734SAlex Elder const char *method_name; 1325476492fSAlex Elder struct ceph_osd_data request_info; 13304017e29SAlex Elder struct ceph_osd_data request_data; 1345476492fSAlex Elder struct ceph_osd_data response_data; 13579528734SAlex Elder __u8 class_len; 13679528734SAlex Elder __u8 method_len; 137bb873b53SIlya Dryomov u32 indata_len; 13879528734SAlex Elder } cls; 13979528734SAlex Elder struct { 14079528734SAlex Elder u64 cookie; 141922dab61SIlya Dryomov __u8 op; /* CEPH_OSD_WATCH_OP_ */ 142922dab61SIlya Dryomov u32 gen; 14379528734SAlex Elder } watch; 144c647b8a8SIlya Dryomov struct { 145922dab61SIlya Dryomov struct ceph_osd_data request_data; 146922dab61SIlya Dryomov } notify_ack; 147922dab61SIlya Dryomov struct { 14819079203SIlya Dryomov u64 cookie; 14919079203SIlya Dryomov struct ceph_osd_data request_data; 15019079203SIlya Dryomov struct ceph_osd_data response_data; 15119079203SIlya Dryomov } notify; 15219079203SIlya Dryomov struct { 153a4ed38d7SDouglas Fuller struct ceph_osd_data response_data; 154a4ed38d7SDouglas Fuller } list_watchers; 155a4ed38d7SDouglas Fuller struct { 156c647b8a8SIlya Dryomov u64 expected_object_size; 157c647b8a8SIlya Dryomov u64 expected_write_size; 158d3798accSIlya Dryomov u32 flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */ 159c647b8a8SIlya Dryomov } alloc_hint; 16023ddf9beSLuis Henriques struct { 16123ddf9beSLuis Henriques u64 snapid; 16223ddf9beSLuis Henriques u64 src_version; 16323ddf9beSLuis Henriques u8 flags; 16423ddf9beSLuis Henriques u32 src_fadvise_flags; 16523ddf9beSLuis Henriques struct ceph_osd_data osd_data; 16623ddf9beSLuis Henriques } copy_from; 16779528734SAlex Elder }; 16879528734SAlex Elder }; 16979528734SAlex Elder 17063244fa1SIlya Dryomov struct ceph_osd_request_target { 17163244fa1SIlya Dryomov struct ceph_object_id base_oid; 17263244fa1SIlya Dryomov struct ceph_object_locator base_oloc; 17363244fa1SIlya Dryomov struct ceph_object_id target_oid; 17463244fa1SIlya Dryomov struct ceph_object_locator target_oloc; 17563244fa1SIlya Dryomov 176dc98ff72SIlya Dryomov struct ceph_pg pgid; /* last raw pg we mapped to */ 177dc98ff72SIlya Dryomov struct ceph_spg spgid; /* last actual spg we mapped to */ 17863244fa1SIlya Dryomov u32 pg_num; 17963244fa1SIlya Dryomov u32 pg_num_mask; 18063244fa1SIlya Dryomov struct ceph_osds acting; 18163244fa1SIlya Dryomov struct ceph_osds up; 18263244fa1SIlya Dryomov int size; 18363244fa1SIlya Dryomov int min_size; 18463244fa1SIlya Dryomov bool sort_bitwise; 185ae78dd81SIlya Dryomov bool recovery_deletes; 18663244fa1SIlya Dryomov 18763244fa1SIlya Dryomov unsigned int flags; /* CEPH_OSD_FLAG_* */ 188117d96a0SIlya Dryomov bool used_replica; 18963244fa1SIlya Dryomov bool paused; 19063244fa1SIlya Dryomov 19104c7d789SIlya Dryomov u32 epoch; 192dc93e0e2SIlya Dryomov u32 last_force_resend; 193dc93e0e2SIlya Dryomov 19463244fa1SIlya Dryomov int osd; 19563244fa1SIlya Dryomov }; 19663244fa1SIlya Dryomov 1973d14c5d2SYehuda Sadeh /* an in-flight request */ 1983d14c5d2SYehuda Sadeh struct ceph_osd_request { 1993d14c5d2SYehuda Sadeh u64 r_tid; /* unique for this client */ 2003d14c5d2SYehuda Sadeh struct rb_node r_node; 2014609245eSIlya Dryomov struct rb_node r_mc_node; /* map check */ 20288bc1922SIlya Dryomov struct work_struct r_complete_work; 2033d14c5d2SYehuda Sadeh struct ceph_osd *r_osd; 204a66dd383SIlya Dryomov 205a66dd383SIlya Dryomov struct ceph_osd_request_target r_t; 206a66dd383SIlya Dryomov #define r_base_oid r_t.base_oid 207a66dd383SIlya Dryomov #define r_base_oloc r_t.base_oloc 208a66dd383SIlya Dryomov #define r_flags r_t.flags 2093d14c5d2SYehuda Sadeh 2103d14c5d2SYehuda Sadeh struct ceph_msg *r_request, *r_reply; 2113d14c5d2SYehuda Sadeh u32 r_sent; /* >0 if r_request is sending/sent */ 2121b83bef2SSage Weil 21379528734SAlex Elder /* request osd ops array */ 21479528734SAlex Elder unsigned int r_num_ops; 21579528734SAlex Elder 2161b83bef2SSage Weil int r_result; 2173d14c5d2SYehuda Sadeh 2183d14c5d2SYehuda Sadeh struct ceph_osd_client *r_osdc; 2193d14c5d2SYehuda Sadeh struct kref r_kref; 2203d14c5d2SYehuda Sadeh bool r_mempool; 221b18b9550SIlya Dryomov struct completion r_completion; /* private to osd_client.c */ 22226be8808SAlex Elder ceph_osdc_callback_t r_callback; 2233d14c5d2SYehuda Sadeh 2243d14c5d2SYehuda Sadeh struct inode *r_inode; /* for use by callbacks */ 22594e85771SIlya Dryomov struct list_head r_private_item; /* ditto */ 2263d14c5d2SYehuda Sadeh void *r_priv; /* ditto */ 2273d14c5d2SYehuda Sadeh 228bb873b53SIlya Dryomov /* set by submitter */ 229bb873b53SIlya Dryomov u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */ 230bb873b53SIlya Dryomov struct ceph_snap_context *r_snapc; /* for writes */ 231fac02ddfSArnd Bergmann struct timespec64 r_mtime; /* ditto */ 232bb873b53SIlya Dryomov u64 r_data_offset; /* ditto */ 233922dab61SIlya Dryomov bool r_linger; /* don't resend on failure */ 2343d14c5d2SYehuda Sadeh 235bb873b53SIlya Dryomov /* internal */ 236bb873b53SIlya Dryomov unsigned long r_stamp; /* jiffies, send or check time */ 2377cc5e38fSIlya Dryomov unsigned long r_start_stamp; /* jiffies */ 23897e27aaaSXiubo Li ktime_t r_start_latency; /* ktime_t */ 23997e27aaaSXiubo Li ktime_t r_end_latency; /* ktime_t */ 240bb873b53SIlya Dryomov int r_attempts; 2414609245eSIlya Dryomov u32 r_map_dne_bound; 2423f1af42aSIlya Dryomov 2433f1af42aSIlya Dryomov struct ceph_osd_req_op r_ops[]; 2443d14c5d2SYehuda Sadeh }; 2453d14c5d2SYehuda Sadeh 246205ee118SIlya Dryomov struct ceph_request_redirect { 247205ee118SIlya Dryomov struct ceph_object_locator oloc; 248205ee118SIlya Dryomov }; 249205ee118SIlya Dryomov 2508cb441c0SIlya Dryomov /* 2518cb441c0SIlya Dryomov * osd request identifier 2528cb441c0SIlya Dryomov * 2538cb441c0SIlya Dryomov * caller name + incarnation# + tid to unique identify this request 2548cb441c0SIlya Dryomov */ 2558cb441c0SIlya Dryomov struct ceph_osd_reqid { 2568cb441c0SIlya Dryomov struct ceph_entity_name name; 2578cb441c0SIlya Dryomov __le64 tid; 2588cb441c0SIlya Dryomov __le32 inc; 2598cb441c0SIlya Dryomov } __packed; 2608cb441c0SIlya Dryomov 2618cb441c0SIlya Dryomov struct ceph_blkin_trace_info { 2628cb441c0SIlya Dryomov __le64 trace_id; 2638cb441c0SIlya Dryomov __le64 span_id; 2648cb441c0SIlya Dryomov __le64 parent_span_id; 2658cb441c0SIlya Dryomov } __packed; 2668cb441c0SIlya Dryomov 267922dab61SIlya Dryomov typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie, 268922dab61SIlya Dryomov u64 notifier_id, void *data, size_t data_len); 269922dab61SIlya Dryomov typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err); 270a40c4f10SYehuda Sadeh 271922dab61SIlya Dryomov struct ceph_osd_linger_request { 272922dab61SIlya Dryomov struct ceph_osd_client *osdc; 273922dab61SIlya Dryomov u64 linger_id; 274922dab61SIlya Dryomov bool committed; 27519079203SIlya Dryomov bool is_watch; /* watch or notify */ 276922dab61SIlya Dryomov 277922dab61SIlya Dryomov struct ceph_osd *osd; 278922dab61SIlya Dryomov struct ceph_osd_request *reg_req; 279922dab61SIlya Dryomov struct ceph_osd_request *ping_req; 280922dab61SIlya Dryomov unsigned long ping_sent; 281b07d3c4bSIlya Dryomov unsigned long watch_valid_thru; 282b07d3c4bSIlya Dryomov struct list_head pending_lworks; 283922dab61SIlya Dryomov 284922dab61SIlya Dryomov struct ceph_osd_request_target t; 2854609245eSIlya Dryomov u32 map_dne_bound; 286922dab61SIlya Dryomov 287fac02ddfSArnd Bergmann struct timespec64 mtime; 288922dab61SIlya Dryomov 289922dab61SIlya Dryomov struct kref kref; 290922dab61SIlya Dryomov struct mutex lock; 291922dab61SIlya Dryomov struct rb_node node; /* osd */ 292922dab61SIlya Dryomov struct rb_node osdc_node; /* osdc */ 2934609245eSIlya Dryomov struct rb_node mc_node; /* map check */ 294922dab61SIlya Dryomov struct list_head scan_item; 295922dab61SIlya Dryomov 296922dab61SIlya Dryomov struct completion reg_commit_wait; 29719079203SIlya Dryomov struct completion notify_finish_wait; 298922dab61SIlya Dryomov int reg_commit_error; 29919079203SIlya Dryomov int notify_finish_error; 300922dab61SIlya Dryomov int last_error; 301922dab61SIlya Dryomov 302922dab61SIlya Dryomov u32 register_gen; 30319079203SIlya Dryomov u64 notify_id; 304922dab61SIlya Dryomov 305922dab61SIlya Dryomov rados_watchcb2_t wcb; 306922dab61SIlya Dryomov rados_watcherrcb_t errcb; 307922dab61SIlya Dryomov void *data; 30819079203SIlya Dryomov 30975dbb685SIlya Dryomov struct ceph_pagelist *request_pl; 31075dbb685SIlya Dryomov struct page **notify_id_pages; 31175dbb685SIlya Dryomov 31219079203SIlya Dryomov struct page ***preply_pages; 31319079203SIlya Dryomov size_t *preply_len; 314a40c4f10SYehuda Sadeh }; 315a40c4f10SYehuda Sadeh 316a4ed38d7SDouglas Fuller struct ceph_watch_item { 317a4ed38d7SDouglas Fuller struct ceph_entity_name name; 318a4ed38d7SDouglas Fuller u64 cookie; 319a4ed38d7SDouglas Fuller struct ceph_entity_addr addr; 320a4ed38d7SDouglas Fuller }; 321a4ed38d7SDouglas Fuller 322a02a946dSIlya Dryomov struct ceph_spg_mapping { 323a02a946dSIlya Dryomov struct rb_node node; 324a02a946dSIlya Dryomov struct ceph_spg spgid; 325a02a946dSIlya Dryomov 326a02a946dSIlya Dryomov struct rb_root backoffs; 327a02a946dSIlya Dryomov }; 328a02a946dSIlya Dryomov 329a02a946dSIlya Dryomov struct ceph_hobject_id { 330a02a946dSIlya Dryomov void *key; 331a02a946dSIlya Dryomov size_t key_len; 332a02a946dSIlya Dryomov void *oid; 333a02a946dSIlya Dryomov size_t oid_len; 334a02a946dSIlya Dryomov u64 snapid; 335a02a946dSIlya Dryomov u32 hash; 336a02a946dSIlya Dryomov u8 is_max; 337a02a946dSIlya Dryomov void *nspace; 338a02a946dSIlya Dryomov size_t nspace_len; 339a02a946dSIlya Dryomov s64 pool; 340a02a946dSIlya Dryomov 341a02a946dSIlya Dryomov /* cache */ 342a02a946dSIlya Dryomov u32 hash_reverse_bits; 343a02a946dSIlya Dryomov }; 344a02a946dSIlya Dryomov 345a02a946dSIlya Dryomov static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid) 346a02a946dSIlya Dryomov { 347a02a946dSIlya Dryomov hoid->hash_reverse_bits = bitrev32(hoid->hash); 348a02a946dSIlya Dryomov } 349a02a946dSIlya Dryomov 350a02a946dSIlya Dryomov /* 351a02a946dSIlya Dryomov * PG-wide backoff: [begin, end) 352a02a946dSIlya Dryomov * per-object backoff: begin == end 353a02a946dSIlya Dryomov */ 354a02a946dSIlya Dryomov struct ceph_osd_backoff { 355a02a946dSIlya Dryomov struct rb_node spg_node; 356a02a946dSIlya Dryomov struct rb_node id_node; 357a02a946dSIlya Dryomov 358a02a946dSIlya Dryomov struct ceph_spg spgid; 359a02a946dSIlya Dryomov u64 id; 360a02a946dSIlya Dryomov struct ceph_hobject_id *begin; 361a02a946dSIlya Dryomov struct ceph_hobject_id *end; 362a02a946dSIlya Dryomov }; 363a02a946dSIlya Dryomov 364264048afSIlya Dryomov #define CEPH_LINGER_ID_START 0xffff000000000000ULL 365264048afSIlya Dryomov 3663d14c5d2SYehuda Sadeh struct ceph_osd_client { 3673d14c5d2SYehuda Sadeh struct ceph_client *client; 3683d14c5d2SYehuda Sadeh 3693d14c5d2SYehuda Sadeh struct ceph_osdmap *osdmap; /* current map */ 3705aea3dcdSIlya Dryomov struct rw_semaphore lock; 3713d14c5d2SYehuda Sadeh 3723d14c5d2SYehuda Sadeh struct rb_root osds; /* osds */ 3733d14c5d2SYehuda Sadeh struct list_head osd_lru; /* idle osds */ 3749dd2845cSIlya Dryomov spinlock_t osd_lru_lock; 37558eb7932SJeff Layton u32 epoch_barrier; 3765aea3dcdSIlya Dryomov struct ceph_osd homeless_osd; 3775aea3dcdSIlya Dryomov atomic64_t last_tid; /* tid of last request */ 378922dab61SIlya Dryomov u64 last_linger_id; 379922dab61SIlya Dryomov struct rb_root linger_requests; /* lingering requests */ 3804609245eSIlya Dryomov struct rb_root map_checks; 3814609245eSIlya Dryomov struct rb_root linger_map_checks; 3825aea3dcdSIlya Dryomov atomic_t num_requests; 3835aea3dcdSIlya Dryomov atomic_t num_homeless; 38466850df5SIlya Dryomov int abort_err; 3853d14c5d2SYehuda Sadeh struct delayed_work timeout_work; 3863d14c5d2SYehuda Sadeh struct delayed_work osds_timeout_work; 3873d14c5d2SYehuda Sadeh #ifdef CONFIG_DEBUG_FS 3883d14c5d2SYehuda Sadeh struct dentry *debugfs_file; 3893d14c5d2SYehuda Sadeh #endif 3903d14c5d2SYehuda Sadeh 3913d14c5d2SYehuda Sadeh mempool_t *req_mempool; 3923d14c5d2SYehuda Sadeh 3933d14c5d2SYehuda Sadeh struct ceph_msgpool msgpool_op; 3943d14c5d2SYehuda Sadeh struct ceph_msgpool msgpool_op_reply; 395a40c4f10SYehuda Sadeh 396a40c4f10SYehuda Sadeh struct workqueue_struct *notify_wq; 39788bc1922SIlya Dryomov struct workqueue_struct *completion_wq; 3983d14c5d2SYehuda Sadeh }; 3993d14c5d2SYehuda Sadeh 400b7ec35b3SIlya Dryomov static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) 401b7ec35b3SIlya Dryomov { 402b7ec35b3SIlya Dryomov return osdc->osdmap->flags & flag; 403b7ec35b3SIlya Dryomov } 404b7ec35b3SIlya Dryomov 4055522ae0bSAlex Elder extern int ceph_osdc_setup(void); 4065522ae0bSAlex Elder extern void ceph_osdc_cleanup(void); 4075522ae0bSAlex Elder 4083d14c5d2SYehuda Sadeh extern int ceph_osdc_init(struct ceph_osd_client *osdc, 4093d14c5d2SYehuda Sadeh struct ceph_client *client); 4103d14c5d2SYehuda Sadeh extern void ceph_osdc_stop(struct ceph_osd_client *osdc); 411120a75eaSYan, Zheng extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); 4123d14c5d2SYehuda Sadeh 4133d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, 4143d14c5d2SYehuda Sadeh struct ceph_msg *msg); 4153d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, 4163d14c5d2SYehuda Sadeh struct ceph_msg *msg); 41758eb7932SJeff Layton void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); 41866850df5SIlya Dryomov void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err); 4192cef0ba8SYan, Zheng void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc); 4203d14c5d2SYehuda Sadeh 4214cf3e6dfSIlya Dryomov #define osd_req_op_data(oreq, whch, typ, fld) \ 4224cf3e6dfSIlya Dryomov ({ \ 4234cf3e6dfSIlya Dryomov struct ceph_osd_request *__oreq = (oreq); \ 4244cf3e6dfSIlya Dryomov unsigned int __whch = (whch); \ 4254cf3e6dfSIlya Dryomov BUG_ON(__whch >= __oreq->r_num_ops); \ 4264cf3e6dfSIlya Dryomov &__oreq->r_ops[__whch].typ.fld; \ 4274cf3e6dfSIlya Dryomov }) 4284cf3e6dfSIlya Dryomov 429042f6498SJeff Layton struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req, 430144cba14SYan, Zheng unsigned int which, u16 opcode, u32 flags); 43149719778SAlex Elder 43249719778SAlex Elder extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, 43349719778SAlex Elder unsigned int which, 43449719778SAlex Elder struct page **pages, u64 length, 43549719778SAlex Elder u32 alignment, bool pages_from_pool, 43649719778SAlex Elder bool own_pages); 43749719778SAlex Elder 438c99d2d4aSAlex Elder extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req, 439c99d2d4aSAlex Elder unsigned int which, u16 opcode, 44033803f33SAlex Elder u64 offset, u64 length, 44133803f33SAlex Elder u64 truncate_size, u32 truncate_seq); 442c99d2d4aSAlex Elder extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req, 443c99d2d4aSAlex Elder unsigned int which, u64 length); 4442c63f49aSYan, Zheng extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, 4452c63f49aSYan, Zheng unsigned int which, u64 offset_inc); 446a4ce40a9SAlex Elder 447a4ce40a9SAlex Elder extern struct ceph_osd_data *osd_req_op_extent_osd_data( 448a4ce40a9SAlex Elder struct ceph_osd_request *osd_req, 449406e2c9fSAlex Elder unsigned int which); 450a4ce40a9SAlex Elder 451a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, 452406e2c9fSAlex Elder unsigned int which, 453a4ce40a9SAlex Elder struct page **pages, u64 length, 454a4ce40a9SAlex Elder u32 alignment, bool pages_from_pool, 455a4ce40a9SAlex Elder bool own_pages); 456a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, 457406e2c9fSAlex Elder unsigned int which, 458a4ce40a9SAlex Elder struct ceph_pagelist *pagelist); 459a4ce40a9SAlex Elder #ifdef CONFIG_BLOCK 4605359a17dSIlya Dryomov void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, 461406e2c9fSAlex Elder unsigned int which, 4625359a17dSIlya Dryomov struct ceph_bio_iter *bio_pos, 4635359a17dSIlya Dryomov u32 bio_length); 464a4ce40a9SAlex Elder #endif /* CONFIG_BLOCK */ 4650010f705SIlya Dryomov void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, 4660010f705SIlya Dryomov unsigned int which, 4670010f705SIlya Dryomov struct bio_vec *bvecs, u32 num_bvecs, 4680010f705SIlya Dryomov u32 bytes); 469b9e281c2SIlya Dryomov void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, 470b9e281c2SIlya Dryomov unsigned int which, 471b9e281c2SIlya Dryomov struct ceph_bvec_iter *bvec_pos); 472a4ce40a9SAlex Elder 47304017e29SAlex Elder extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, 47404017e29SAlex Elder unsigned int which, 47504017e29SAlex Elder struct ceph_pagelist *pagelist); 4766c57b554SAlex Elder extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, 4776c57b554SAlex Elder unsigned int which, 4786c57b554SAlex Elder struct page **pages, u64 length, 4796c57b554SAlex Elder u32 alignment, bool pages_from_pool, 4806c57b554SAlex Elder bool own_pages); 481b9e281c2SIlya Dryomov void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, 482b9e281c2SIlya Dryomov unsigned int which, 4830010f705SIlya Dryomov struct bio_vec *bvecs, u32 num_bvecs, 4840010f705SIlya Dryomov u32 bytes); 485a4ce40a9SAlex Elder extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, 486c99d2d4aSAlex Elder unsigned int which, 487a4ce40a9SAlex Elder struct page **pages, u64 length, 488a4ce40a9SAlex Elder u32 alignment, bool pages_from_pool, 489a4ce40a9SAlex Elder bool own_pages); 49024639ce5SIlya Dryomov int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 49104017e29SAlex Elder const char *class, const char *method); 492d74b50beSYan, Zheng extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, 493d74b50beSYan, Zheng u16 opcode, const char *name, const void *value, 494d74b50beSYan, Zheng size_t size, u8 cmp_op, u8 cmp_mode); 495c647b8a8SIlya Dryomov extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, 496c647b8a8SIlya Dryomov unsigned int which, 497c647b8a8SIlya Dryomov u64 expected_object_size, 498d3798accSIlya Dryomov u64 expected_write_size, 499d3798accSIlya Dryomov u32 flags); 500aca39d9eSLuís Henriques extern int osd_req_op_copy_from_init(struct ceph_osd_request *req, 501aca39d9eSLuís Henriques u64 src_snapid, u64 src_version, 502aca39d9eSLuís Henriques struct ceph_object_id *src_oid, 503aca39d9eSLuís Henriques struct ceph_object_locator *src_oloc, 504aca39d9eSLuís Henriques u32 src_fadvise_flags, 505aca39d9eSLuís Henriques u32 dst_fadvise_flags, 506aca39d9eSLuís Henriques u32 truncate_seq, u64 truncate_size, 507aca39d9eSLuís Henriques u8 copy_from_flags); 50833803f33SAlex Elder 5093d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 5103d14c5d2SYehuda Sadeh struct ceph_snap_context *snapc, 511acead002SAlex Elder unsigned int num_ops, 5123d14c5d2SYehuda Sadeh bool use_mempool, 51354a54007SAlex Elder gfp_t gfp_flags); 51413d1ad16SIlya Dryomov int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); 5153d14c5d2SYehuda Sadeh 5163d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, 5173d14c5d2SYehuda Sadeh struct ceph_file_layout *layout, 5183d14c5d2SYehuda Sadeh struct ceph_vino vino, 519acead002SAlex Elder u64 offset, u64 *len, 520715e4cd4SYan, Zheng unsigned int which, int num_ops, 521715e4cd4SYan, Zheng int opcode, int flags, 5223d14c5d2SYehuda Sadeh struct ceph_snap_context *snapc, 523acead002SAlex Elder u32 truncate_seq, u64 truncate_size, 524153e5167SAlex Elder bool use_mempool); 5253d14c5d2SYehuda Sadeh 526*a679e50fSJeff Layton int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); 527*a679e50fSJeff Layton 528*a679e50fSJeff Layton /* 529*a679e50fSJeff Layton * How big an extent array should we preallocate for a sparse read? This is 530*a679e50fSJeff Layton * just a starting value. If we get more than this back from the OSD, the 531*a679e50fSJeff Layton * receiver will reallocate. 532*a679e50fSJeff Layton */ 533*a679e50fSJeff Layton #define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 534*a679e50fSJeff Layton 535*a679e50fSJeff Layton static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) 536*a679e50fSJeff Layton { 537*a679e50fSJeff Layton return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); 538*a679e50fSJeff Layton } 539*a679e50fSJeff Layton 5409e94af20SIlya Dryomov extern void ceph_osdc_get_request(struct ceph_osd_request *req); 5419e94af20SIlya Dryomov extern void ceph_osdc_put_request(struct ceph_osd_request *req); 5423d14c5d2SYehuda Sadeh 543a8af0d68SJeff Layton void ceph_osdc_start_request(struct ceph_osd_client *osdc, 544a8af0d68SJeff Layton struct ceph_osd_request *req); 545c9f9b93dSIlya Dryomov extern void ceph_osdc_cancel_request(struct ceph_osd_request *req); 5463d14c5d2SYehuda Sadeh extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, 5473d14c5d2SYehuda Sadeh struct ceph_osd_request *req); 5483d14c5d2SYehuda Sadeh extern void ceph_osdc_sync(struct ceph_osd_client *osdc); 5493d14c5d2SYehuda Sadeh 550dd935f44SJosh Durgin extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); 5517cca78c9SIlya Dryomov void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc); 552dd935f44SJosh Durgin 553428a7158SDouglas Fuller int ceph_osdc_call(struct ceph_osd_client *osdc, 554428a7158SDouglas Fuller struct ceph_object_id *oid, 555428a7158SDouglas Fuller struct ceph_object_locator *oloc, 556428a7158SDouglas Fuller const char *class, const char *method, 557428a7158SDouglas Fuller unsigned int flags, 558428a7158SDouglas Fuller struct page *req_page, size_t req_len, 55968ada915SIlya Dryomov struct page **resp_pages, size_t *resp_len); 560428a7158SDouglas Fuller 561922dab61SIlya Dryomov /* watch/notify */ 562922dab61SIlya Dryomov struct ceph_osd_linger_request * 563922dab61SIlya Dryomov ceph_osdc_watch(struct ceph_osd_client *osdc, 564922dab61SIlya Dryomov struct ceph_object_id *oid, 565922dab61SIlya Dryomov struct ceph_object_locator *oloc, 566922dab61SIlya Dryomov rados_watchcb2_t wcb, 567922dab61SIlya Dryomov rados_watcherrcb_t errcb, 568922dab61SIlya Dryomov void *data); 569922dab61SIlya Dryomov int ceph_osdc_unwatch(struct ceph_osd_client *osdc, 570922dab61SIlya Dryomov struct ceph_osd_linger_request *lreq); 571922dab61SIlya Dryomov 572922dab61SIlya Dryomov int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, 573922dab61SIlya Dryomov struct ceph_object_id *oid, 574922dab61SIlya Dryomov struct ceph_object_locator *oloc, 575922dab61SIlya Dryomov u64 notify_id, 576922dab61SIlya Dryomov u64 cookie, 577922dab61SIlya Dryomov void *payload, 5786d54228fSIlya Dryomov u32 payload_len); 57919079203SIlya Dryomov int ceph_osdc_notify(struct ceph_osd_client *osdc, 58019079203SIlya Dryomov struct ceph_object_id *oid, 58119079203SIlya Dryomov struct ceph_object_locator *oloc, 58219079203SIlya Dryomov void *payload, 5836d54228fSIlya Dryomov u32 payload_len, 58419079203SIlya Dryomov u32 timeout, 58519079203SIlya Dryomov struct page ***preply_pages, 58619079203SIlya Dryomov size_t *preply_len); 587b07d3c4bSIlya Dryomov int ceph_osdc_watch_check(struct ceph_osd_client *osdc, 588b07d3c4bSIlya Dryomov struct ceph_osd_linger_request *lreq); 589a4ed38d7SDouglas Fuller int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, 590a4ed38d7SDouglas Fuller struct ceph_object_id *oid, 591a4ed38d7SDouglas Fuller struct ceph_object_locator *oloc, 592a4ed38d7SDouglas Fuller struct ceph_watch_item **watchers, 593a4ed38d7SDouglas Fuller u32 *num_watchers); 5943d14c5d2SYehuda Sadeh 595*a679e50fSJeff Layton /* Find offset into the buffer of the end of the extent map */ 596*a679e50fSJeff Layton static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op) 597*a679e50fSJeff Layton { 598*a679e50fSJeff Layton struct ceph_sparse_extent *ext; 599*a679e50fSJeff Layton 600*a679e50fSJeff Layton /* No extents? No data */ 601*a679e50fSJeff Layton if (op->extent.sparse_ext_cnt == 0) 602*a679e50fSJeff Layton return 0; 603*a679e50fSJeff Layton 604*a679e50fSJeff Layton ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1]; 605*a679e50fSJeff Layton 606*a679e50fSJeff Layton return ext->off + ext->len - op->extent.offset; 607*a679e50fSJeff Layton } 608*a679e50fSJeff Layton 609*a679e50fSJeff Layton #endif 610