1 #ifndef __EXTENTIO__ 2 #define __EXTENTIO__ 3 4 #include <linux/rbtree.h> 5 #include "ulist.h" 6 7 /* bits for the extent state */ 8 #define EXTENT_DIRTY (1U << 0) 9 #define EXTENT_WRITEBACK (1U << 1) 10 #define EXTENT_UPTODATE (1U << 2) 11 #define EXTENT_LOCKED (1U << 3) 12 #define EXTENT_NEW (1U << 4) 13 #define EXTENT_DELALLOC (1U << 5) 14 #define EXTENT_DEFRAG (1U << 6) 15 #define EXTENT_BOUNDARY (1U << 9) 16 #define EXTENT_NODATASUM (1U << 10) 17 #define EXTENT_DO_ACCOUNTING (1U << 11) 18 #define EXTENT_FIRST_DELALLOC (1U << 12) 19 #define EXTENT_NEED_WAIT (1U << 13) 20 #define EXTENT_DAMAGED (1U << 14) 21 #define EXTENT_NORESERVE (1U << 15) 22 #define EXTENT_QGROUP_RESERVED (1U << 16) 23 #define EXTENT_CLEAR_DATA_RESV (1U << 17) 24 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 25 #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 26 27 /* 28 * flags for bio submission. The high bits indicate the compression 29 * type for this bio 30 */ 31 #define EXTENT_BIO_COMPRESSED 1 32 #define EXTENT_BIO_TREE_LOG 2 33 #define EXTENT_BIO_FLAG_SHIFT 16 34 35 /* these are bit numbers for test/set bit */ 36 #define EXTENT_BUFFER_UPTODATE 0 37 #define EXTENT_BUFFER_DIRTY 2 38 #define EXTENT_BUFFER_CORRUPT 3 39 #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ 40 #define EXTENT_BUFFER_TREE_REF 5 41 #define EXTENT_BUFFER_STALE 6 42 #define EXTENT_BUFFER_WRITEBACK 7 43 #define EXTENT_BUFFER_READ_ERR 8 /* read IO error */ 44 #define EXTENT_BUFFER_DUMMY 9 45 #define EXTENT_BUFFER_IN_TREE 10 46 #define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ 47 48 /* these are flags for __process_pages_contig */ 49 #define PAGE_UNLOCK (1 << 0) 50 #define PAGE_CLEAR_DIRTY (1 << 1) 51 #define PAGE_SET_WRITEBACK (1 << 2) 52 #define PAGE_END_WRITEBACK (1 << 3) 53 #define PAGE_SET_PRIVATE2 (1 << 4) 54 #define PAGE_SET_ERROR (1 << 5) 55 #define PAGE_LOCK (1 << 6) 56 57 /* 58 * page->private values. Every page that is controlled by the extent 59 * map has page->private set to one. 60 */ 61 #define EXTENT_PAGE_PRIVATE 1 62 63 /* 64 * The extent buffer bitmap operations are done with byte granularity instead of 65 * word granularity for two reasons: 66 * 1. The bitmaps must be little-endian on disk. 67 * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a 68 * single word in a bitmap may straddle two pages in the extent buffer. 69 */ 70 #define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) 71 #define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) 72 #define BITMAP_FIRST_BYTE_MASK(start) \ 73 ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) 74 #define BITMAP_LAST_BYTE_MASK(nbits) \ 75 (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) 76 77 static inline int le_test_bit(int nr, const u8 *addr) 78 { 79 return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1))); 80 } 81 82 extern void le_bitmap_set(u8 *map, unsigned int start, int len); 83 extern void le_bitmap_clear(u8 *map, unsigned int start, int len); 84 85 struct extent_state; 86 struct btrfs_root; 87 struct btrfs_inode; 88 struct btrfs_io_bio; 89 struct io_failure_record; 90 91 typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio, 92 int mirror_num, unsigned long bio_flags, 93 u64 bio_offset); 94 struct extent_io_ops { 95 /* 96 * The following callbacks must be allways defined, the function 97 * pointer will be called unconditionally. 98 */ 99 extent_submit_bio_hook_t *submit_bio_hook; 100 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, 101 struct page *page, u64 start, u64 end, 102 int mirror); 103 int (*merge_bio_hook)(struct page *page, unsigned long offset, 104 size_t size, struct bio *bio, 105 unsigned long bio_flags); 106 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 107 108 /* 109 * Optional hooks, called if the pointer is not NULL 110 */ 111 int (*fill_delalloc)(struct inode *inode, struct page *locked_page, 112 u64 start, u64 end, int *page_started, 113 unsigned long *nr_written); 114 115 int (*writepage_start_hook)(struct page *page, u64 start, u64 end); 116 void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 117 struct extent_state *state, int uptodate); 118 void (*set_bit_hook)(struct inode *inode, struct extent_state *state, 119 unsigned *bits); 120 void (*clear_bit_hook)(struct btrfs_inode *inode, 121 struct extent_state *state, 122 unsigned *bits); 123 void (*merge_extent_hook)(struct inode *inode, 124 struct extent_state *new, 125 struct extent_state *other); 126 void (*split_extent_hook)(struct inode *inode, 127 struct extent_state *orig, u64 split); 128 }; 129 130 struct extent_io_tree { 131 struct rb_root state; 132 struct address_space *mapping; 133 u64 dirty_bytes; 134 int track_uptodate; 135 spinlock_t lock; 136 const struct extent_io_ops *ops; 137 }; 138 139 struct extent_state { 140 u64 start; 141 u64 end; /* inclusive */ 142 struct rb_node rb_node; 143 144 /* ADD NEW ELEMENTS AFTER THIS */ 145 wait_queue_head_t wq; 146 atomic_t refs; 147 unsigned state; 148 149 struct io_failure_record *failrec; 150 151 #ifdef CONFIG_BTRFS_DEBUG 152 struct list_head leak_list; 153 #endif 154 }; 155 156 #define INLINE_EXTENT_BUFFER_PAGES 16 157 #define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE) 158 struct extent_buffer { 159 u64 start; 160 unsigned long len; 161 unsigned long bflags; 162 struct btrfs_fs_info *fs_info; 163 spinlock_t refs_lock; 164 atomic_t refs; 165 atomic_t io_pages; 166 int read_mirror; 167 struct rcu_head rcu_head; 168 pid_t lock_owner; 169 170 /* count of read lock holders on the extent buffer */ 171 atomic_t write_locks; 172 atomic_t read_locks; 173 atomic_t blocking_writers; 174 atomic_t blocking_readers; 175 atomic_t spinning_readers; 176 atomic_t spinning_writers; 177 short lock_nested; 178 /* >= 0 if eb belongs to a log tree, -1 otherwise */ 179 short log_index; 180 181 /* protects write locks */ 182 rwlock_t lock; 183 184 /* readers use lock_wq while they wait for the write 185 * lock holders to unlock 186 */ 187 wait_queue_head_t write_lock_wq; 188 189 /* writers use read_lock_wq while they wait for readers 190 * to unlock 191 */ 192 wait_queue_head_t read_lock_wq; 193 struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; 194 #ifdef CONFIG_BTRFS_DEBUG 195 struct list_head leak_list; 196 #endif 197 }; 198 199 /* 200 * Structure to record how many bytes and which ranges are set/cleared 201 */ 202 struct extent_changeset { 203 /* How many bytes are set/cleared in this operation */ 204 u64 bytes_changed; 205 206 /* Changed ranges */ 207 struct ulist range_changed; 208 }; 209 210 static inline void extent_set_compress_type(unsigned long *bio_flags, 211 int compress_type) 212 { 213 *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; 214 } 215 216 static inline int extent_compress_type(unsigned long bio_flags) 217 { 218 return bio_flags >> EXTENT_BIO_FLAG_SHIFT; 219 } 220 221 struct extent_map_tree; 222 223 typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, 224 struct page *page, 225 size_t pg_offset, 226 u64 start, u64 len, 227 int create); 228 229 void extent_io_tree_init(struct extent_io_tree *tree, 230 struct address_space *mapping); 231 int try_release_extent_mapping(struct extent_map_tree *map, 232 struct extent_io_tree *tree, struct page *page, 233 gfp_t mask); 234 int try_release_extent_buffer(struct page *page); 235 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 236 struct extent_state **cached); 237 238 static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end) 239 { 240 return lock_extent_bits(tree, start, end, NULL); 241 } 242 243 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 244 int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 245 get_extent_t *get_extent, int mirror_num); 246 int __init extent_io_init(void); 247 void extent_io_exit(void); 248 249 u64 count_range_bits(struct extent_io_tree *tree, 250 u64 *start, u64 search_end, 251 u64 max_bytes, unsigned bits, int contig); 252 253 void free_extent_state(struct extent_state *state); 254 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 255 unsigned bits, int filled, 256 struct extent_state *cached_state); 257 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 258 unsigned bits, struct extent_changeset *changeset); 259 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 260 unsigned bits, int wake, int delete, 261 struct extent_state **cached, gfp_t mask); 262 263 static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) 264 { 265 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 266 GFP_NOFS); 267 } 268 269 static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start, 270 u64 end, struct extent_state **cached, gfp_t mask) 271 { 272 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, 273 mask); 274 } 275 276 static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, 277 u64 end, unsigned bits) 278 { 279 int wake = 0; 280 281 if (bits & EXTENT_LOCKED) 282 wake = 1; 283 284 return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, 285 GFP_NOFS); 286 } 287 288 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 289 unsigned bits, struct extent_changeset *changeset); 290 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 291 unsigned bits, u64 *failed_start, 292 struct extent_state **cached_state, gfp_t mask); 293 294 static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, 295 u64 end, unsigned bits) 296 { 297 return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS); 298 } 299 300 static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 301 u64 end, struct extent_state **cached_state, gfp_t mask) 302 { 303 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, 304 cached_state, mask); 305 } 306 307 static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start, 308 u64 end, gfp_t mask) 309 { 310 return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, 311 NULL, mask); 312 } 313 314 static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start, 315 u64 end) 316 { 317 return clear_extent_bit(tree, start, end, 318 EXTENT_DIRTY | EXTENT_DELALLOC | 319 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 320 } 321 322 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 323 unsigned bits, unsigned clear_bits, 324 struct extent_state **cached_state); 325 326 static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start, 327 u64 end, struct extent_state **cached_state) 328 { 329 return set_extent_bit(tree, start, end, 330 EXTENT_DELALLOC | EXTENT_UPTODATE, 331 NULL, cached_state, GFP_NOFS); 332 } 333 334 static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start, 335 u64 end, struct extent_state **cached_state) 336 { 337 return set_extent_bit(tree, start, end, 338 EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, 339 NULL, cached_state, GFP_NOFS); 340 } 341 342 static inline int set_extent_new(struct extent_io_tree *tree, u64 start, 343 u64 end) 344 { 345 return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL, 346 GFP_NOFS); 347 } 348 349 static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start, 350 u64 end, struct extent_state **cached_state, gfp_t mask) 351 { 352 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL, 353 cached_state, mask); 354 } 355 356 int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 357 u64 *start_ret, u64 *end_ret, unsigned bits, 358 struct extent_state **cached_state); 359 int extent_invalidatepage(struct extent_io_tree *tree, 360 struct page *page, unsigned long offset); 361 int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 362 get_extent_t *get_extent, 363 struct writeback_control *wbc); 364 int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, 365 u64 start, u64 end, get_extent_t *get_extent, 366 int mode); 367 int extent_writepages(struct extent_io_tree *tree, 368 struct address_space *mapping, 369 get_extent_t *get_extent, 370 struct writeback_control *wbc); 371 int btree_write_cache_pages(struct address_space *mapping, 372 struct writeback_control *wbc); 373 int extent_readpages(struct extent_io_tree *tree, 374 struct address_space *mapping, 375 struct list_head *pages, unsigned nr_pages, 376 get_extent_t get_extent); 377 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 378 __u64 start, __u64 len, get_extent_t *get_extent); 379 void set_page_extent_mapped(struct page *page); 380 381 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 382 u64 start); 383 struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 384 u64 start, unsigned long len); 385 struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 386 u64 start); 387 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); 388 struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, 389 u64 start); 390 void free_extent_buffer(struct extent_buffer *eb); 391 void free_extent_buffer_stale(struct extent_buffer *eb); 392 #define WAIT_NONE 0 393 #define WAIT_COMPLETE 1 394 #define WAIT_PAGE_LOCK 2 395 int read_extent_buffer_pages(struct extent_io_tree *tree, 396 struct extent_buffer *eb, int wait, 397 get_extent_t *get_extent, int mirror_num); 398 void wait_on_extent_buffer_writeback(struct extent_buffer *eb); 399 400 static inline unsigned long num_extent_pages(u64 start, u64 len) 401 { 402 return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 403 (start >> PAGE_SHIFT); 404 } 405 406 static inline void extent_buffer_get(struct extent_buffer *eb) 407 { 408 atomic_inc(&eb->refs); 409 } 410 411 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 412 unsigned long start, 413 unsigned long len); 414 void read_extent_buffer(struct extent_buffer *eb, void *dst, 415 unsigned long start, 416 unsigned long len); 417 int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, 418 unsigned long start, 419 unsigned long len); 420 void write_extent_buffer_fsid(struct extent_buffer *eb, const void *src); 421 void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb, 422 const void *src); 423 void write_extent_buffer(struct extent_buffer *eb, const void *src, 424 unsigned long start, unsigned long len); 425 void copy_extent_buffer_full(struct extent_buffer *dst, 426 struct extent_buffer *src); 427 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 428 unsigned long dst_offset, unsigned long src_offset, 429 unsigned long len); 430 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 431 unsigned long src_offset, unsigned long len); 432 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 433 unsigned long src_offset, unsigned long len); 434 void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start, 435 unsigned long len); 436 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, 437 unsigned long pos); 438 void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, 439 unsigned long pos, unsigned long len); 440 void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, 441 unsigned long pos, unsigned long len); 442 void clear_extent_buffer_dirty(struct extent_buffer *eb); 443 int set_extent_buffer_dirty(struct extent_buffer *eb); 444 void set_extent_buffer_uptodate(struct extent_buffer *eb); 445 void clear_extent_buffer_uptodate(struct extent_buffer *eb); 446 int extent_buffer_uptodate(struct extent_buffer *eb); 447 int extent_buffer_under_io(struct extent_buffer *eb); 448 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, 449 unsigned long min_len, char **map, 450 unsigned long *map_start, 451 unsigned long *map_len); 452 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); 453 void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); 454 void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, 455 u64 delalloc_end, struct page *locked_page, 456 unsigned bits_to_clear, 457 unsigned long page_ops); 458 struct bio * 459 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 460 gfp_t gfp_flags); 461 struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs); 462 struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask); 463 464 struct btrfs_fs_info; 465 struct btrfs_inode; 466 467 int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, 468 u64 logical, struct page *page, 469 unsigned int pg_offset, int mirror_num); 470 int clean_io_failure(struct btrfs_inode *inode, u64 start, 471 struct page *page, unsigned int pg_offset); 472 void end_extent_writepage(struct page *page, int err, u64 start, u64 end); 473 int repair_eb_io_failure(struct btrfs_fs_info *fs_info, 474 struct extent_buffer *eb, int mirror_num); 475 476 /* 477 * When IO fails, either with EIO or csum verification fails, we 478 * try other mirrors that might have a good copy of the data. This 479 * io_failure_record is used to record state as we go through all the 480 * mirrors. If another mirror has good data, the page is set up to date 481 * and things continue. If a good mirror can't be found, the original 482 * bio end_io callback is called to indicate things have failed. 483 */ 484 struct io_failure_record { 485 struct page *page; 486 u64 start; 487 u64 len; 488 u64 logical; 489 unsigned long bio_flags; 490 int this_mirror; 491 int failed_mirror; 492 int in_validation; 493 }; 494 495 496 void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, 497 u64 end); 498 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, 499 struct io_failure_record **failrec_ret); 500 int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, 501 struct io_failure_record *failrec, int fail_mirror); 502 struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, 503 struct io_failure_record *failrec, 504 struct page *page, int pg_offset, int icsum, 505 bio_end_io_t *endio_func, void *data); 506 int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec); 507 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 508 noinline u64 find_lock_delalloc_range(struct inode *inode, 509 struct extent_io_tree *tree, 510 struct page *locked_page, u64 *start, 511 u64 *end, u64 max_bytes); 512 #endif 513 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, 514 u64 start); 515 #endif 516