1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #ifndef BTRFS_VOLUMES_H 7 #define BTRFS_VOLUMES_H 8 9 #include <linux/bio.h> 10 #include <linux/sort.h> 11 #include <linux/btrfs.h> 12 #include "async-thread.h" 13 14 #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) 15 16 extern struct mutex uuid_mutex; 17 18 #define BTRFS_STRIPE_LEN SZ_64K 19 20 struct buffer_head; 21 struct btrfs_pending_bios { 22 struct bio *head; 23 struct bio *tail; 24 }; 25 26 /* 27 * Use sequence counter to get consistent device stat data on 28 * 32-bit processors. 29 */ 30 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 31 #include <linux/seqlock.h> 32 #define __BTRFS_NEED_DEVICE_DATA_ORDERED 33 #define btrfs_device_data_ordered_init(device) \ 34 seqcount_init(&device->data_seqcount) 35 #else 36 #define btrfs_device_data_ordered_init(device) do { } while (0) 37 #endif 38 39 #define BTRFS_DEV_STATE_WRITEABLE (0) 40 #define BTRFS_DEV_STATE_IN_FS_METADATA (1) 41 #define BTRFS_DEV_STATE_MISSING (2) 42 #define BTRFS_DEV_STATE_REPLACE_TGT (3) 43 #define BTRFS_DEV_STATE_FLUSH_SENT (4) 44 45 struct btrfs_device { 46 struct list_head dev_list; 47 struct list_head dev_alloc_list; 48 struct btrfs_fs_devices *fs_devices; 49 struct btrfs_fs_info *fs_info; 50 51 struct rcu_string *name; 52 53 u64 generation; 54 55 spinlock_t io_lock ____cacheline_aligned; 56 int running_pending; 57 /* regular prio bios */ 58 struct btrfs_pending_bios pending_bios; 59 /* sync bios */ 60 struct btrfs_pending_bios pending_sync_bios; 61 62 struct block_device *bdev; 63 64 /* the mode sent to blkdev_get */ 65 fmode_t mode; 66 67 unsigned long dev_state; 68 blk_status_t last_flush_error; 69 int flush_bio_sent; 70 71 #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED 72 seqcount_t data_seqcount; 73 #endif 74 75 /* the internal btrfs device id */ 76 u64 devid; 77 78 /* size of the device in memory */ 79 u64 total_bytes; 80 81 /* size of the device on disk */ 82 u64 disk_total_bytes; 83 84 /* bytes used */ 85 u64 bytes_used; 86 87 /* optimal io alignment for this device */ 88 u32 io_align; 89 90 /* optimal io width for this device */ 91 u32 io_width; 92 /* type and info about this device */ 93 u64 type; 94 95 /* minimal io size for this device */ 96 u32 sector_size; 97 98 /* physical drive uuid (or lvm uuid) */ 99 u8 uuid[BTRFS_UUID_SIZE]; 100 101 /* 102 * size of the device on the current transaction 103 * 104 * This variant is update when committing the transaction, 105 * and protected by device_list_mutex 106 */ 107 u64 commit_total_bytes; 108 109 /* bytes used on the current transaction */ 110 u64 commit_bytes_used; 111 /* 112 * used to manage the device which is resized 113 * 114 * It is protected by chunk_lock. 115 */ 116 struct list_head resized_list; 117 118 /* for sending down flush barriers */ 119 struct bio *flush_bio; 120 struct completion flush_wait; 121 122 /* per-device scrub information */ 123 struct scrub_ctx *scrub_ctx; 124 125 struct btrfs_work work; 126 struct rcu_head rcu; 127 128 /* readahead state */ 129 atomic_t reada_in_flight; 130 u64 reada_next; 131 struct reada_zone *reada_curr_zone; 132 struct radix_tree_root reada_zones; 133 struct radix_tree_root reada_extents; 134 135 /* disk I/O failure stats. For detailed description refer to 136 * enum btrfs_dev_stat_values in ioctl.h */ 137 int dev_stats_valid; 138 139 /* Counter to record the change of device stats */ 140 atomic_t dev_stats_ccnt; 141 atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; 142 }; 143 144 /* 145 * If we read those variants at the context of their own lock, we needn't 146 * use the following helpers, reading them directly is safe. 147 */ 148 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 149 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 150 static inline u64 \ 151 btrfs_device_get_##name(const struct btrfs_device *dev) \ 152 { \ 153 u64 size; \ 154 unsigned int seq; \ 155 \ 156 do { \ 157 seq = read_seqcount_begin(&dev->data_seqcount); \ 158 size = dev->name; \ 159 } while (read_seqcount_retry(&dev->data_seqcount, seq)); \ 160 return size; \ 161 } \ 162 \ 163 static inline void \ 164 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 165 { \ 166 preempt_disable(); \ 167 write_seqcount_begin(&dev->data_seqcount); \ 168 dev->name = size; \ 169 write_seqcount_end(&dev->data_seqcount); \ 170 preempt_enable(); \ 171 } 172 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 173 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 174 static inline u64 \ 175 btrfs_device_get_##name(const struct btrfs_device *dev) \ 176 { \ 177 u64 size; \ 178 \ 179 preempt_disable(); \ 180 size = dev->name; \ 181 preempt_enable(); \ 182 return size; \ 183 } \ 184 \ 185 static inline void \ 186 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 187 { \ 188 preempt_disable(); \ 189 dev->name = size; \ 190 preempt_enable(); \ 191 } 192 #else 193 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 194 static inline u64 \ 195 btrfs_device_get_##name(const struct btrfs_device *dev) \ 196 { \ 197 return dev->name; \ 198 } \ 199 \ 200 static inline void \ 201 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 202 { \ 203 dev->name = size; \ 204 } 205 #endif 206 207 BTRFS_DEVICE_GETSET_FUNCS(total_bytes); 208 BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes); 209 BTRFS_DEVICE_GETSET_FUNCS(bytes_used); 210 211 struct btrfs_fs_devices { 212 u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ 213 u8 metadata_uuid[BTRFS_FSID_SIZE]; 214 bool fsid_change; 215 struct list_head fs_list; 216 217 u64 num_devices; 218 u64 open_devices; 219 u64 rw_devices; 220 u64 missing_devices; 221 u64 total_rw_bytes; 222 u64 total_devices; 223 224 /* Highest generation number of seen devices */ 225 u64 latest_generation; 226 227 struct block_device *latest_bdev; 228 229 /* all of the devices in the FS, protected by a mutex 230 * so we can safely walk it to write out the supers without 231 * worrying about add/remove by the multi-device code. 232 * Scrubbing super can kick off supers writing by holding 233 * this mutex lock. 234 */ 235 struct mutex device_list_mutex; 236 struct list_head devices; 237 238 struct list_head resized_devices; 239 /* devices not currently being allocated */ 240 struct list_head alloc_list; 241 242 struct btrfs_fs_devices *seed; 243 int seeding; 244 245 int opened; 246 247 /* set when we find or add a device that doesn't have the 248 * nonrot flag set 249 */ 250 int rotating; 251 252 struct btrfs_fs_info *fs_info; 253 /* sysfs kobjects */ 254 struct kobject fsid_kobj; 255 struct kobject *device_dir_kobj; 256 struct completion kobj_unregister; 257 }; 258 259 #define BTRFS_BIO_INLINE_CSUM_SIZE 64 260 261 /* 262 * we need the mirror number and stripe index to be passed around 263 * the call chain while we are processing end_io (especially errors). 264 * Really, what we need is a btrfs_bio structure that has this info 265 * and is properly sized with its stripe array, but we're not there 266 * quite yet. We have our own btrfs bioset, and all of the bios 267 * we allocate are actually btrfs_io_bios. We'll cram as much of 268 * struct btrfs_bio as we can into this over time. 269 */ 270 struct btrfs_io_bio { 271 unsigned int mirror_num; 272 unsigned int stripe_index; 273 u64 logical; 274 u8 *csum; 275 u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; 276 struct bvec_iter iter; 277 /* 278 * This member must come last, bio_alloc_bioset will allocate enough 279 * bytes for entire btrfs_io_bio but relies on bio being last. 280 */ 281 struct bio bio; 282 }; 283 284 static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio) 285 { 286 return container_of(bio, struct btrfs_io_bio, bio); 287 } 288 289 static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio) 290 { 291 if (io_bio->csum != io_bio->csum_inline) { 292 kfree(io_bio->csum); 293 io_bio->csum = NULL; 294 } 295 } 296 297 struct btrfs_bio_stripe { 298 struct btrfs_device *dev; 299 u64 physical; 300 u64 length; /* only used for discard mappings */ 301 }; 302 303 struct btrfs_bio { 304 refcount_t refs; 305 atomic_t stripes_pending; 306 struct btrfs_fs_info *fs_info; 307 u64 map_type; /* get from map_lookup->type */ 308 bio_end_io_t *end_io; 309 struct bio *orig_bio; 310 unsigned long flags; 311 void *private; 312 atomic_t error; 313 int max_errors; 314 int num_stripes; 315 int mirror_num; 316 int num_tgtdevs; 317 int *tgtdev_map; 318 /* 319 * logical block numbers for the start of each stripe 320 * The last one or two are p/q. These are sorted, 321 * so raid_map[0] is the start of our full stripe 322 */ 323 u64 *raid_map; 324 struct btrfs_bio_stripe stripes[]; 325 }; 326 327 struct btrfs_device_info { 328 struct btrfs_device *dev; 329 u64 dev_offset; 330 u64 max_avail; 331 u64 total_avail; 332 }; 333 334 struct btrfs_raid_attr { 335 int sub_stripes; /* sub_stripes info for map */ 336 int dev_stripes; /* stripes per dev */ 337 int devs_max; /* max devs to use */ 338 int devs_min; /* min devs needed */ 339 int tolerated_failures; /* max tolerated fail devs */ 340 int devs_increment; /* ndevs has to be a multiple of this */ 341 int ncopies; /* how many copies to data has */ 342 int nparity; /* number of stripes worth of bytes to store 343 * parity information */ 344 int mindev_error; /* error code if min devs requisite is unmet */ 345 const char raid_name[8]; /* name of the raid */ 346 u64 bg_flag; /* block group flag of the raid */ 347 }; 348 349 extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES]; 350 351 struct map_lookup { 352 u64 type; 353 int io_align; 354 int io_width; 355 u64 stripe_len; 356 int num_stripes; 357 int sub_stripes; 358 int verified_stripes; /* For mount time dev extent verification */ 359 struct btrfs_bio_stripe stripes[]; 360 }; 361 362 #define map_lookup_size(n) (sizeof(struct map_lookup) + \ 363 (sizeof(struct btrfs_bio_stripe) * (n))) 364 365 struct btrfs_balance_args; 366 struct btrfs_balance_progress; 367 struct btrfs_balance_control { 368 struct btrfs_balance_args data; 369 struct btrfs_balance_args meta; 370 struct btrfs_balance_args sys; 371 372 u64 flags; 373 374 struct btrfs_balance_progress stat; 375 }; 376 377 enum btrfs_map_op { 378 BTRFS_MAP_READ, 379 BTRFS_MAP_WRITE, 380 BTRFS_MAP_DISCARD, 381 BTRFS_MAP_GET_READ_MIRRORS, 382 }; 383 384 static inline enum btrfs_map_op btrfs_op(struct bio *bio) 385 { 386 switch (bio_op(bio)) { 387 case REQ_OP_DISCARD: 388 return BTRFS_MAP_DISCARD; 389 case REQ_OP_WRITE: 390 return BTRFS_MAP_WRITE; 391 default: 392 WARN_ON_ONCE(1); 393 case REQ_OP_READ: 394 return BTRFS_MAP_READ; 395 } 396 } 397 398 void btrfs_get_bbio(struct btrfs_bio *bbio); 399 void btrfs_put_bbio(struct btrfs_bio *bbio); 400 int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, 401 u64 logical, u64 *length, 402 struct btrfs_bio **bbio_ret, int mirror_num); 403 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, 404 u64 logical, u64 *length, 405 struct btrfs_bio **bbio_ret); 406 int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, 407 u64 physical, u64 **logical, int *naddrs, int *stripe_len); 408 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); 409 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); 410 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type); 411 void btrfs_mapping_init(struct btrfs_mapping_tree *tree); 412 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); 413 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 414 int mirror_num, int async_submit); 415 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 416 fmode_t flags, void *holder); 417 struct btrfs_device *btrfs_scan_one_device(const char *path, 418 fmode_t flags, void *holder); 419 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 420 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step); 421 void btrfs_assign_next_active_device(struct btrfs_device *device, 422 struct btrfs_device *this_dev); 423 struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, 424 u64 devid, 425 const char *devpath); 426 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, 427 const u64 *devid, 428 const u8 *uuid); 429 void btrfs_free_device(struct btrfs_device *device); 430 int btrfs_rm_device(struct btrfs_fs_info *fs_info, 431 const char *device_path, u64 devid); 432 void __exit btrfs_cleanup_fs_uuids(void); 433 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); 434 int btrfs_grow_device(struct btrfs_trans_handle *trans, 435 struct btrfs_device *device, u64 new_size); 436 struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, 437 u8 *uuid, u8 *fsid); 438 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); 439 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path); 440 int btrfs_balance(struct btrfs_fs_info *fs_info, 441 struct btrfs_balance_control *bctl, 442 struct btrfs_ioctl_balance_args *bargs); 443 void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf); 444 int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); 445 int btrfs_recover_balance(struct btrfs_fs_info *fs_info); 446 int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 447 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 448 int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); 449 int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info); 450 int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset); 451 int find_free_dev_extent_start(struct btrfs_transaction *transaction, 452 struct btrfs_device *device, u64 num_bytes, 453 u64 search_start, u64 *start, u64 *max_avail); 454 int find_free_dev_extent(struct btrfs_trans_handle *trans, 455 struct btrfs_device *device, u64 num_bytes, 456 u64 *start, u64 *max_avail); 457 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 458 int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, 459 struct btrfs_ioctl_get_dev_stats *stats); 460 void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); 461 int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); 462 int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, 463 struct btrfs_fs_info *fs_info); 464 void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); 465 void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, 466 struct btrfs_device *srcdev); 467 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev); 468 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); 469 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, 470 u64 logical, u64 len); 471 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, 472 u64 logical); 473 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, 474 u64 chunk_offset, u64 chunk_size); 475 int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset); 476 struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, 477 u64 logical, u64 length); 478 479 static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, 480 int index) 481 { 482 atomic_inc(dev->dev_stat_values + index); 483 /* 484 * This memory barrier orders stores updating statistics before stores 485 * updating dev_stats_ccnt. 486 * 487 * It pairs with smp_rmb() in btrfs_run_dev_stats(). 488 */ 489 smp_mb__before_atomic(); 490 atomic_inc(&dev->dev_stats_ccnt); 491 } 492 493 static inline int btrfs_dev_stat_read(struct btrfs_device *dev, 494 int index) 495 { 496 return atomic_read(dev->dev_stat_values + index); 497 } 498 499 static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, 500 int index) 501 { 502 int ret; 503 504 ret = atomic_xchg(dev->dev_stat_values + index, 0); 505 /* 506 * atomic_xchg implies a full memory barriers as per atomic_t.txt: 507 * - RMW operations that have a return value are fully ordered; 508 * 509 * This implicit memory barriers is paired with the smp_rmb in 510 * btrfs_run_dev_stats 511 */ 512 atomic_inc(&dev->dev_stats_ccnt); 513 return ret; 514 } 515 516 static inline void btrfs_dev_stat_set(struct btrfs_device *dev, 517 int index, unsigned long val) 518 { 519 atomic_set(dev->dev_stat_values + index, val); 520 /* 521 * This memory barrier orders stores updating statistics before stores 522 * updating dev_stats_ccnt. 523 * 524 * It pairs with smp_rmb() in btrfs_run_dev_stats(). 525 */ 526 smp_mb__before_atomic(); 527 atomic_inc(&dev->dev_stats_ccnt); 528 } 529 530 static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, 531 int index) 532 { 533 btrfs_dev_stat_set(dev, index, 0); 534 } 535 536 /* 537 * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which 538 * can be used as index to access btrfs_raid_array[]. 539 */ 540 static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags) 541 { 542 if (flags & BTRFS_BLOCK_GROUP_RAID10) 543 return BTRFS_RAID_RAID10; 544 else if (flags & BTRFS_BLOCK_GROUP_RAID1) 545 return BTRFS_RAID_RAID1; 546 else if (flags & BTRFS_BLOCK_GROUP_DUP) 547 return BTRFS_RAID_DUP; 548 else if (flags & BTRFS_BLOCK_GROUP_RAID0) 549 return BTRFS_RAID_RAID0; 550 else if (flags & BTRFS_BLOCK_GROUP_RAID5) 551 return BTRFS_RAID_RAID5; 552 else if (flags & BTRFS_BLOCK_GROUP_RAID6) 553 return BTRFS_RAID_RAID6; 554 555 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ 556 } 557 558 const char *get_raid_name(enum btrfs_raid_types type); 559 560 void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); 561 void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans); 562 563 struct list_head *btrfs_get_fs_uuids(void); 564 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); 565 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); 566 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, 567 struct btrfs_device *failing_dev); 568 569 int btrfs_bg_type_to_factor(u64 flags); 570 int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); 571 572 #endif 573