1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #ifndef BTRFS_VOLUMES_H 7 #define BTRFS_VOLUMES_H 8 9 #include <linux/bio.h> 10 #include <linux/sort.h> 11 #include <linux/btrfs.h> 12 #include "async-thread.h" 13 14 #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) 15 16 extern struct mutex uuid_mutex; 17 18 #define BTRFS_STRIPE_LEN SZ_64K 19 20 struct buffer_head; 21 struct btrfs_pending_bios { 22 struct bio *head; 23 struct bio *tail; 24 }; 25 26 /* 27 * Use sequence counter to get consistent device stat data on 28 * 32-bit processors. 29 */ 30 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 31 #include <linux/seqlock.h> 32 #define __BTRFS_NEED_DEVICE_DATA_ORDERED 33 #define btrfs_device_data_ordered_init(device) \ 34 seqcount_init(&device->data_seqcount) 35 #else 36 #define btrfs_device_data_ordered_init(device) do { } while (0) 37 #endif 38 39 #define BTRFS_DEV_STATE_WRITEABLE (0) 40 #define BTRFS_DEV_STATE_IN_FS_METADATA (1) 41 #define BTRFS_DEV_STATE_MISSING (2) 42 #define BTRFS_DEV_STATE_REPLACE_TGT (3) 43 #define BTRFS_DEV_STATE_FLUSH_SENT (4) 44 45 struct btrfs_device { 46 struct list_head dev_list; 47 struct list_head dev_alloc_list; 48 struct list_head post_commit_list; /* chunk mutex */ 49 struct btrfs_fs_devices *fs_devices; 50 struct btrfs_fs_info *fs_info; 51 52 struct rcu_string *name; 53 54 u64 generation; 55 56 spinlock_t io_lock ____cacheline_aligned; 57 int running_pending; 58 /* regular prio bios */ 59 struct btrfs_pending_bios pending_bios; 60 /* sync bios */ 61 struct btrfs_pending_bios pending_sync_bios; 62 63 struct block_device *bdev; 64 65 /* the mode sent to blkdev_get */ 66 fmode_t mode; 67 68 unsigned long dev_state; 69 blk_status_t last_flush_error; 70 int flush_bio_sent; 71 72 #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED 73 seqcount_t data_seqcount; 74 #endif 75 76 /* the internal btrfs device id */ 77 u64 devid; 78 79 /* size of the device in memory */ 80 u64 total_bytes; 81 82 /* size of the device on disk */ 83 u64 disk_total_bytes; 84 85 /* bytes used */ 86 u64 bytes_used; 87 88 /* optimal io alignment for this device */ 89 u32 io_align; 90 91 /* optimal io width for this device */ 92 u32 io_width; 93 /* type and info about this device */ 94 u64 type; 95 96 /* minimal io size for this device */ 97 u32 sector_size; 98 99 /* physical drive uuid (or lvm uuid) */ 100 u8 uuid[BTRFS_UUID_SIZE]; 101 102 /* 103 * size of the device on the current transaction 104 * 105 * This variant is update when committing the transaction, 106 * and protected by chunk mutex 107 */ 108 u64 commit_total_bytes; 109 110 /* bytes used on the current transaction */ 111 u64 commit_bytes_used; 112 113 /* for sending down flush barriers */ 114 struct bio *flush_bio; 115 struct completion flush_wait; 116 117 /* per-device scrub information */ 118 struct scrub_ctx *scrub_ctx; 119 120 struct btrfs_work work; 121 122 /* readahead state */ 123 atomic_t reada_in_flight; 124 u64 reada_next; 125 struct reada_zone *reada_curr_zone; 126 struct radix_tree_root reada_zones; 127 struct radix_tree_root reada_extents; 128 129 /* disk I/O failure stats. For detailed description refer to 130 * enum btrfs_dev_stat_values in ioctl.h */ 131 int dev_stats_valid; 132 133 /* Counter to record the change of device stats */ 134 atomic_t dev_stats_ccnt; 135 atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; 136 137 struct extent_io_tree alloc_state; 138 }; 139 140 /* 141 * If we read those variants at the context of their own lock, we needn't 142 * use the following helpers, reading them directly is safe. 143 */ 144 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 145 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 146 static inline u64 \ 147 btrfs_device_get_##name(const struct btrfs_device *dev) \ 148 { \ 149 u64 size; \ 150 unsigned int seq; \ 151 \ 152 do { \ 153 seq = read_seqcount_begin(&dev->data_seqcount); \ 154 size = dev->name; \ 155 } while (read_seqcount_retry(&dev->data_seqcount, seq)); \ 156 return size; \ 157 } \ 158 \ 159 static inline void \ 160 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 161 { \ 162 preempt_disable(); \ 163 write_seqcount_begin(&dev->data_seqcount); \ 164 dev->name = size; \ 165 write_seqcount_end(&dev->data_seqcount); \ 166 preempt_enable(); \ 167 } 168 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 169 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 170 static inline u64 \ 171 btrfs_device_get_##name(const struct btrfs_device *dev) \ 172 { \ 173 u64 size; \ 174 \ 175 preempt_disable(); \ 176 size = dev->name; \ 177 preempt_enable(); \ 178 return size; \ 179 } \ 180 \ 181 static inline void \ 182 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 183 { \ 184 preempt_disable(); \ 185 dev->name = size; \ 186 preempt_enable(); \ 187 } 188 #else 189 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 190 static inline u64 \ 191 btrfs_device_get_##name(const struct btrfs_device *dev) \ 192 { \ 193 return dev->name; \ 194 } \ 195 \ 196 static inline void \ 197 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 198 { \ 199 dev->name = size; \ 200 } 201 #endif 202 203 BTRFS_DEVICE_GETSET_FUNCS(total_bytes); 204 BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes); 205 BTRFS_DEVICE_GETSET_FUNCS(bytes_used); 206 207 struct btrfs_fs_devices { 208 u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ 209 u8 metadata_uuid[BTRFS_FSID_SIZE]; 210 bool fsid_change; 211 struct list_head fs_list; 212 213 u64 num_devices; 214 u64 open_devices; 215 u64 rw_devices; 216 u64 missing_devices; 217 u64 total_rw_bytes; 218 u64 total_devices; 219 220 /* Highest generation number of seen devices */ 221 u64 latest_generation; 222 223 struct block_device *latest_bdev; 224 225 /* all of the devices in the FS, protected by a mutex 226 * so we can safely walk it to write out the supers without 227 * worrying about add/remove by the multi-device code. 228 * Scrubbing super can kick off supers writing by holding 229 * this mutex lock. 230 */ 231 struct mutex device_list_mutex; 232 struct list_head devices; 233 234 /* devices not currently being allocated */ 235 struct list_head alloc_list; 236 237 struct btrfs_fs_devices *seed; 238 int seeding; 239 240 int opened; 241 242 /* set when we find or add a device that doesn't have the 243 * nonrot flag set 244 */ 245 int rotating; 246 247 struct btrfs_fs_info *fs_info; 248 /* sysfs kobjects */ 249 struct kobject fsid_kobj; 250 struct kobject *device_dir_kobj; 251 struct completion kobj_unregister; 252 }; 253 254 #define BTRFS_BIO_INLINE_CSUM_SIZE 64 255 256 #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ 257 - sizeof(struct btrfs_chunk)) \ 258 / sizeof(struct btrfs_stripe) + 1) 259 260 #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ 261 - 2 * sizeof(struct btrfs_disk_key) \ 262 - 2 * sizeof(struct btrfs_chunk)) \ 263 / sizeof(struct btrfs_stripe) + 1) 264 265 /* 266 * we need the mirror number and stripe index to be passed around 267 * the call chain while we are processing end_io (especially errors). 268 * Really, what we need is a btrfs_bio structure that has this info 269 * and is properly sized with its stripe array, but we're not there 270 * quite yet. We have our own btrfs bioset, and all of the bios 271 * we allocate are actually btrfs_io_bios. We'll cram as much of 272 * struct btrfs_bio as we can into this over time. 273 */ 274 struct btrfs_io_bio { 275 unsigned int mirror_num; 276 unsigned int stripe_index; 277 u64 logical; 278 u8 *csum; 279 u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; 280 struct bvec_iter iter; 281 /* 282 * This member must come last, bio_alloc_bioset will allocate enough 283 * bytes for entire btrfs_io_bio but relies on bio being last. 284 */ 285 struct bio bio; 286 }; 287 288 static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio) 289 { 290 return container_of(bio, struct btrfs_io_bio, bio); 291 } 292 293 static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio) 294 { 295 if (io_bio->csum != io_bio->csum_inline) { 296 kfree(io_bio->csum); 297 io_bio->csum = NULL; 298 } 299 } 300 301 struct btrfs_bio_stripe { 302 struct btrfs_device *dev; 303 u64 physical; 304 u64 length; /* only used for discard mappings */ 305 }; 306 307 struct btrfs_bio { 308 refcount_t refs; 309 atomic_t stripes_pending; 310 struct btrfs_fs_info *fs_info; 311 u64 map_type; /* get from map_lookup->type */ 312 bio_end_io_t *end_io; 313 struct bio *orig_bio; 314 unsigned long flags; 315 void *private; 316 atomic_t error; 317 int max_errors; 318 int num_stripes; 319 int mirror_num; 320 int num_tgtdevs; 321 int *tgtdev_map; 322 /* 323 * logical block numbers for the start of each stripe 324 * The last one or two are p/q. These are sorted, 325 * so raid_map[0] is the start of our full stripe 326 */ 327 u64 *raid_map; 328 struct btrfs_bio_stripe stripes[]; 329 }; 330 331 struct btrfs_device_info { 332 struct btrfs_device *dev; 333 u64 dev_offset; 334 u64 max_avail; 335 u64 total_avail; 336 }; 337 338 struct btrfs_raid_attr { 339 int sub_stripes; /* sub_stripes info for map */ 340 int dev_stripes; /* stripes per dev */ 341 int devs_max; /* max devs to use */ 342 int devs_min; /* min devs needed */ 343 int tolerated_failures; /* max tolerated fail devs */ 344 int devs_increment; /* ndevs has to be a multiple of this */ 345 int ncopies; /* how many copies to data has */ 346 int nparity; /* number of stripes worth of bytes to store 347 * parity information */ 348 int mindev_error; /* error code if min devs requisite is unmet */ 349 const char raid_name[8]; /* name of the raid */ 350 u64 bg_flag; /* block group flag of the raid */ 351 }; 352 353 extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES]; 354 355 struct map_lookup { 356 u64 type; 357 int io_align; 358 int io_width; 359 u64 stripe_len; 360 int num_stripes; 361 int sub_stripes; 362 int verified_stripes; /* For mount time dev extent verification */ 363 struct btrfs_bio_stripe stripes[]; 364 }; 365 366 #define map_lookup_size(n) (sizeof(struct map_lookup) + \ 367 (sizeof(struct btrfs_bio_stripe) * (n))) 368 369 struct btrfs_balance_args; 370 struct btrfs_balance_progress; 371 struct btrfs_balance_control { 372 struct btrfs_balance_args data; 373 struct btrfs_balance_args meta; 374 struct btrfs_balance_args sys; 375 376 u64 flags; 377 378 struct btrfs_balance_progress stat; 379 }; 380 381 enum btrfs_map_op { 382 BTRFS_MAP_READ, 383 BTRFS_MAP_WRITE, 384 BTRFS_MAP_DISCARD, 385 BTRFS_MAP_GET_READ_MIRRORS, 386 }; 387 388 static inline enum btrfs_map_op btrfs_op(struct bio *bio) 389 { 390 switch (bio_op(bio)) { 391 case REQ_OP_DISCARD: 392 return BTRFS_MAP_DISCARD; 393 case REQ_OP_WRITE: 394 return BTRFS_MAP_WRITE; 395 default: 396 WARN_ON_ONCE(1); 397 /* fall through */ 398 case REQ_OP_READ: 399 return BTRFS_MAP_READ; 400 } 401 } 402 403 void btrfs_get_bbio(struct btrfs_bio *bbio); 404 void btrfs_put_bbio(struct btrfs_bio *bbio); 405 int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, 406 u64 logical, u64 *length, 407 struct btrfs_bio **bbio_ret, int mirror_num); 408 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, 409 u64 logical, u64 *length, 410 struct btrfs_bio **bbio_ret); 411 int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, 412 u64 physical, u64 **logical, int *naddrs, int *stripe_len); 413 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); 414 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); 415 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type); 416 void btrfs_mapping_init(struct btrfs_mapping_tree *tree); 417 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); 418 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 419 int mirror_num, int async_submit); 420 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 421 fmode_t flags, void *holder); 422 struct btrfs_device *btrfs_scan_one_device(const char *path, 423 fmode_t flags, void *holder); 424 int btrfs_forget_devices(const char *path); 425 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 426 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step); 427 void btrfs_assign_next_active_device(struct btrfs_device *device, 428 struct btrfs_device *this_dev); 429 struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, 430 u64 devid, 431 const char *devpath); 432 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, 433 const u64 *devid, 434 const u8 *uuid); 435 void btrfs_free_device(struct btrfs_device *device); 436 int btrfs_rm_device(struct btrfs_fs_info *fs_info, 437 const char *device_path, u64 devid); 438 void __exit btrfs_cleanup_fs_uuids(void); 439 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); 440 int btrfs_grow_device(struct btrfs_trans_handle *trans, 441 struct btrfs_device *device, u64 new_size); 442 struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices, 443 u64 devid, u8 *uuid, u8 *fsid, bool seed); 444 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); 445 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path); 446 int btrfs_balance(struct btrfs_fs_info *fs_info, 447 struct btrfs_balance_control *bctl, 448 struct btrfs_ioctl_balance_args *bargs); 449 void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf); 450 int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); 451 int btrfs_recover_balance(struct btrfs_fs_info *fs_info); 452 int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 453 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 454 int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); 455 int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info); 456 int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset); 457 int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes, 458 u64 search_start, u64 *start, u64 *max_avail); 459 int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 460 u64 *start, u64 *max_avail); 461 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 462 int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, 463 struct btrfs_ioctl_get_dev_stats *stats); 464 void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); 465 int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); 466 int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); 467 void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); 468 void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev); 469 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev); 470 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); 471 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, 472 u64 logical, u64 len); 473 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, 474 u64 logical); 475 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, 476 u64 chunk_offset, u64 chunk_size); 477 int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset); 478 struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, 479 u64 logical, u64 length); 480 481 static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, 482 int index) 483 { 484 atomic_inc(dev->dev_stat_values + index); 485 /* 486 * This memory barrier orders stores updating statistics before stores 487 * updating dev_stats_ccnt. 488 * 489 * It pairs with smp_rmb() in btrfs_run_dev_stats(). 490 */ 491 smp_mb__before_atomic(); 492 atomic_inc(&dev->dev_stats_ccnt); 493 } 494 495 static inline int btrfs_dev_stat_read(struct btrfs_device *dev, 496 int index) 497 { 498 return atomic_read(dev->dev_stat_values + index); 499 } 500 501 static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, 502 int index) 503 { 504 int ret; 505 506 ret = atomic_xchg(dev->dev_stat_values + index, 0); 507 /* 508 * atomic_xchg implies a full memory barriers as per atomic_t.txt: 509 * - RMW operations that have a return value are fully ordered; 510 * 511 * This implicit memory barriers is paired with the smp_rmb in 512 * btrfs_run_dev_stats 513 */ 514 atomic_inc(&dev->dev_stats_ccnt); 515 return ret; 516 } 517 518 static inline void btrfs_dev_stat_set(struct btrfs_device *dev, 519 int index, unsigned long val) 520 { 521 atomic_set(dev->dev_stat_values + index, val); 522 /* 523 * This memory barrier orders stores updating statistics before stores 524 * updating dev_stats_ccnt. 525 * 526 * It pairs with smp_rmb() in btrfs_run_dev_stats(). 527 */ 528 smp_mb__before_atomic(); 529 atomic_inc(&dev->dev_stats_ccnt); 530 } 531 532 static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, 533 int index) 534 { 535 btrfs_dev_stat_set(dev, index, 0); 536 } 537 538 /* 539 * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which 540 * can be used as index to access btrfs_raid_array[]. 541 */ 542 static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags) 543 { 544 if (flags & BTRFS_BLOCK_GROUP_RAID10) 545 return BTRFS_RAID_RAID10; 546 else if (flags & BTRFS_BLOCK_GROUP_RAID1) 547 return BTRFS_RAID_RAID1; 548 else if (flags & BTRFS_BLOCK_GROUP_DUP) 549 return BTRFS_RAID_DUP; 550 else if (flags & BTRFS_BLOCK_GROUP_RAID0) 551 return BTRFS_RAID_RAID0; 552 else if (flags & BTRFS_BLOCK_GROUP_RAID5) 553 return BTRFS_RAID_RAID5; 554 else if (flags & BTRFS_BLOCK_GROUP_RAID6) 555 return BTRFS_RAID_RAID6; 556 557 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ 558 } 559 560 const char *get_raid_name(enum btrfs_raid_types type); 561 562 void btrfs_commit_device_sizes(struct btrfs_transaction *trans); 563 564 struct list_head *btrfs_get_fs_uuids(void); 565 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); 566 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); 567 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, 568 struct btrfs_device *failing_dev); 569 570 int btrfs_bg_type_to_factor(u64 flags); 571 int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); 572 573 #endif 574