1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #ifndef BTRFS_VOLUMES_H 7 #define BTRFS_VOLUMES_H 8 9 #include <linux/bio.h> 10 #include <linux/sort.h> 11 #include <linux/btrfs.h> 12 #include "async-thread.h" 13 14 #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) 15 16 extern struct mutex uuid_mutex; 17 18 #define BTRFS_STRIPE_LEN SZ_64K 19 20 struct btrfs_io_geometry { 21 /* remaining bytes before crossing a stripe */ 22 u64 len; 23 /* offset of logical address in chunk */ 24 u64 offset; 25 /* length of single IO stripe */ 26 u64 stripe_len; 27 /* number of stripe where address falls */ 28 u64 stripe_nr; 29 /* offset of address in stripe */ 30 u64 stripe_offset; 31 /* offset of raid56 stripe into the chunk */ 32 u64 raid56_stripe_offset; 33 }; 34 35 /* 36 * Use sequence counter to get consistent device stat data on 37 * 32-bit processors. 38 */ 39 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 40 #include <linux/seqlock.h> 41 #define __BTRFS_NEED_DEVICE_DATA_ORDERED 42 #define btrfs_device_data_ordered_init(device) \ 43 seqcount_init(&device->data_seqcount) 44 #else 45 #define btrfs_device_data_ordered_init(device) do { } while (0) 46 #endif 47 48 #define BTRFS_DEV_STATE_WRITEABLE (0) 49 #define BTRFS_DEV_STATE_IN_FS_METADATA (1) 50 #define BTRFS_DEV_STATE_MISSING (2) 51 #define BTRFS_DEV_STATE_REPLACE_TGT (3) 52 #define BTRFS_DEV_STATE_FLUSH_SENT (4) 53 54 struct btrfs_device { 55 struct list_head dev_list; /* device_list_mutex */ 56 struct list_head dev_alloc_list; /* chunk mutex */ 57 struct list_head post_commit_list; /* chunk mutex */ 58 struct btrfs_fs_devices *fs_devices; 59 struct btrfs_fs_info *fs_info; 60 61 struct rcu_string *name; 62 63 u64 generation; 64 65 struct block_device *bdev; 66 67 /* the mode sent to blkdev_get */ 68 fmode_t mode; 69 70 unsigned long dev_state; 71 blk_status_t last_flush_error; 72 73 #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED 74 seqcount_t data_seqcount; 75 #endif 76 77 /* the internal btrfs device id */ 78 u64 devid; 79 80 /* size of the device in memory */ 81 u64 total_bytes; 82 83 /* size of the device on disk */ 84 u64 disk_total_bytes; 85 86 /* bytes used */ 87 u64 bytes_used; 88 89 /* optimal io alignment for this device */ 90 u32 io_align; 91 92 /* optimal io width for this device */ 93 u32 io_width; 94 /* type and info about this device */ 95 u64 type; 96 97 /* minimal io size for this device */ 98 u32 sector_size; 99 100 /* physical drive uuid (or lvm uuid) */ 101 u8 uuid[BTRFS_UUID_SIZE]; 102 103 /* 104 * size of the device on the current transaction 105 * 106 * This variant is update when committing the transaction, 107 * and protected by chunk mutex 108 */ 109 u64 commit_total_bytes; 110 111 /* bytes used on the current transaction */ 112 u64 commit_bytes_used; 113 114 /* for sending down flush barriers */ 115 struct bio *flush_bio; 116 struct completion flush_wait; 117 118 /* per-device scrub information */ 119 struct scrub_ctx *scrub_ctx; 120 121 /* readahead state */ 122 atomic_t reada_in_flight; 123 u64 reada_next; 124 struct reada_zone *reada_curr_zone; 125 struct radix_tree_root reada_zones; 126 struct radix_tree_root reada_extents; 127 128 /* disk I/O failure stats. For detailed description refer to 129 * enum btrfs_dev_stat_values in ioctl.h */ 130 int dev_stats_valid; 131 132 /* Counter to record the change of device stats */ 133 atomic_t dev_stats_ccnt; 134 atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; 135 136 struct extent_io_tree alloc_state; 137 138 struct completion kobj_unregister; 139 /* For sysfs/FSID/devinfo/devid/ */ 140 struct kobject devid_kobj; 141 }; 142 143 /* 144 * If we read those variants at the context of their own lock, we needn't 145 * use the following helpers, reading them directly is safe. 146 */ 147 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 148 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 149 static inline u64 \ 150 btrfs_device_get_##name(const struct btrfs_device *dev) \ 151 { \ 152 u64 size; \ 153 unsigned int seq; \ 154 \ 155 do { \ 156 seq = read_seqcount_begin(&dev->data_seqcount); \ 157 size = dev->name; \ 158 } while (read_seqcount_retry(&dev->data_seqcount, seq)); \ 159 return size; \ 160 } \ 161 \ 162 static inline void \ 163 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 164 { \ 165 preempt_disable(); \ 166 write_seqcount_begin(&dev->data_seqcount); \ 167 dev->name = size; \ 168 write_seqcount_end(&dev->data_seqcount); \ 169 preempt_enable(); \ 170 } 171 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) 172 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 173 static inline u64 \ 174 btrfs_device_get_##name(const struct btrfs_device *dev) \ 175 { \ 176 u64 size; \ 177 \ 178 preempt_disable(); \ 179 size = dev->name; \ 180 preempt_enable(); \ 181 return size; \ 182 } \ 183 \ 184 static inline void \ 185 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 186 { \ 187 preempt_disable(); \ 188 dev->name = size; \ 189 preempt_enable(); \ 190 } 191 #else 192 #define BTRFS_DEVICE_GETSET_FUNCS(name) \ 193 static inline u64 \ 194 btrfs_device_get_##name(const struct btrfs_device *dev) \ 195 { \ 196 return dev->name; \ 197 } \ 198 \ 199 static inline void \ 200 btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ 201 { \ 202 dev->name = size; \ 203 } 204 #endif 205 206 BTRFS_DEVICE_GETSET_FUNCS(total_bytes); 207 BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes); 208 BTRFS_DEVICE_GETSET_FUNCS(bytes_used); 209 210 enum btrfs_chunk_allocation_policy { 211 BTRFS_CHUNK_ALLOC_REGULAR, 212 }; 213 214 struct btrfs_fs_devices { 215 u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ 216 u8 metadata_uuid[BTRFS_FSID_SIZE]; 217 bool fsid_change; 218 struct list_head fs_list; 219 220 u64 num_devices; 221 u64 open_devices; 222 u64 rw_devices; 223 u64 missing_devices; 224 u64 total_rw_bytes; 225 u64 total_devices; 226 227 /* Highest generation number of seen devices */ 228 u64 latest_generation; 229 230 struct block_device *latest_bdev; 231 232 /* all of the devices in the FS, protected by a mutex 233 * so we can safely walk it to write out the supers without 234 * worrying about add/remove by the multi-device code. 235 * Scrubbing super can kick off supers writing by holding 236 * this mutex lock. 237 */ 238 struct mutex device_list_mutex; 239 240 /* List of all devices, protected by device_list_mutex */ 241 struct list_head devices; 242 243 /* 244 * Devices which can satisfy space allocation. Protected by 245 * chunk_mutex 246 */ 247 struct list_head alloc_list; 248 249 struct btrfs_fs_devices *seed; 250 bool seeding; 251 252 int opened; 253 254 /* set when we find or add a device that doesn't have the 255 * nonrot flag set 256 */ 257 bool rotating; 258 259 struct btrfs_fs_info *fs_info; 260 /* sysfs kobjects */ 261 struct kobject fsid_kobj; 262 struct kobject *devices_kobj; 263 struct kobject *devinfo_kobj; 264 struct completion kobj_unregister; 265 266 enum btrfs_chunk_allocation_policy chunk_alloc_policy; 267 }; 268 269 #define BTRFS_BIO_INLINE_CSUM_SIZE 64 270 271 #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ 272 - sizeof(struct btrfs_chunk)) \ 273 / sizeof(struct btrfs_stripe) + 1) 274 275 #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ 276 - 2 * sizeof(struct btrfs_disk_key) \ 277 - 2 * sizeof(struct btrfs_chunk)) \ 278 / sizeof(struct btrfs_stripe) + 1) 279 280 /* 281 * we need the mirror number and stripe index to be passed around 282 * the call chain while we are processing end_io (especially errors). 283 * Really, what we need is a btrfs_bio structure that has this info 284 * and is properly sized with its stripe array, but we're not there 285 * quite yet. We have our own btrfs bioset, and all of the bios 286 * we allocate are actually btrfs_io_bios. We'll cram as much of 287 * struct btrfs_bio as we can into this over time. 288 */ 289 struct btrfs_io_bio { 290 unsigned int mirror_num; 291 struct btrfs_device *device; 292 u64 logical; 293 u8 *csum; 294 u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; 295 struct bvec_iter iter; 296 /* 297 * This member must come last, bio_alloc_bioset will allocate enough 298 * bytes for entire btrfs_io_bio but relies on bio being last. 299 */ 300 struct bio bio; 301 }; 302 303 static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio) 304 { 305 return container_of(bio, struct btrfs_io_bio, bio); 306 } 307 308 static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio) 309 { 310 if (io_bio->csum != io_bio->csum_inline) { 311 kfree(io_bio->csum); 312 io_bio->csum = NULL; 313 } 314 } 315 316 struct btrfs_bio_stripe { 317 struct btrfs_device *dev; 318 u64 physical; 319 u64 length; /* only used for discard mappings */ 320 }; 321 322 struct btrfs_bio { 323 refcount_t refs; 324 atomic_t stripes_pending; 325 struct btrfs_fs_info *fs_info; 326 u64 map_type; /* get from map_lookup->type */ 327 bio_end_io_t *end_io; 328 struct bio *orig_bio; 329 void *private; 330 atomic_t error; 331 int max_errors; 332 int num_stripes; 333 int mirror_num; 334 int num_tgtdevs; 335 int *tgtdev_map; 336 /* 337 * logical block numbers for the start of each stripe 338 * The last one or two are p/q. These are sorted, 339 * so raid_map[0] is the start of our full stripe 340 */ 341 u64 *raid_map; 342 struct btrfs_bio_stripe stripes[]; 343 }; 344 345 struct btrfs_device_info { 346 struct btrfs_device *dev; 347 u64 dev_offset; 348 u64 max_avail; 349 u64 total_avail; 350 }; 351 352 struct btrfs_raid_attr { 353 u8 sub_stripes; /* sub_stripes info for map */ 354 u8 dev_stripes; /* stripes per dev */ 355 u8 devs_max; /* max devs to use */ 356 u8 devs_min; /* min devs needed */ 357 u8 tolerated_failures; /* max tolerated fail devs */ 358 u8 devs_increment; /* ndevs has to be a multiple of this */ 359 u8 ncopies; /* how many copies to data has */ 360 u8 nparity; /* number of stripes worth of bytes to store 361 * parity information */ 362 u8 mindev_error; /* error code if min devs requisite is unmet */ 363 const char raid_name[8]; /* name of the raid */ 364 u64 bg_flag; /* block group flag of the raid */ 365 }; 366 367 extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES]; 368 369 struct map_lookup { 370 u64 type; 371 int io_align; 372 int io_width; 373 u64 stripe_len; 374 int num_stripes; 375 int sub_stripes; 376 int verified_stripes; /* For mount time dev extent verification */ 377 struct btrfs_bio_stripe stripes[]; 378 }; 379 380 #define map_lookup_size(n) (sizeof(struct map_lookup) + \ 381 (sizeof(struct btrfs_bio_stripe) * (n))) 382 383 struct btrfs_balance_args; 384 struct btrfs_balance_progress; 385 struct btrfs_balance_control { 386 struct btrfs_balance_args data; 387 struct btrfs_balance_args meta; 388 struct btrfs_balance_args sys; 389 390 u64 flags; 391 392 struct btrfs_balance_progress stat; 393 }; 394 395 enum btrfs_map_op { 396 BTRFS_MAP_READ, 397 BTRFS_MAP_WRITE, 398 BTRFS_MAP_DISCARD, 399 BTRFS_MAP_GET_READ_MIRRORS, 400 }; 401 402 static inline enum btrfs_map_op btrfs_op(struct bio *bio) 403 { 404 switch (bio_op(bio)) { 405 case REQ_OP_DISCARD: 406 return BTRFS_MAP_DISCARD; 407 case REQ_OP_WRITE: 408 return BTRFS_MAP_WRITE; 409 default: 410 WARN_ON_ONCE(1); 411 fallthrough; 412 case REQ_OP_READ: 413 return BTRFS_MAP_READ; 414 } 415 } 416 417 void btrfs_get_bbio(struct btrfs_bio *bbio); 418 void btrfs_put_bbio(struct btrfs_bio *bbio); 419 int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, 420 u64 logical, u64 *length, 421 struct btrfs_bio **bbio_ret, int mirror_num); 422 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, 423 u64 logical, u64 *length, 424 struct btrfs_bio **bbio_ret); 425 int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, 426 u64 logical, u64 len, struct btrfs_io_geometry *io_geom); 427 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); 428 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); 429 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type); 430 void btrfs_mapping_tree_free(struct extent_map_tree *tree); 431 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 432 int mirror_num); 433 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 434 fmode_t flags, void *holder); 435 struct btrfs_device *btrfs_scan_one_device(const char *path, 436 fmode_t flags, void *holder); 437 int btrfs_forget_devices(const char *path); 438 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 439 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step); 440 void btrfs_assign_next_active_device(struct btrfs_device *device, 441 struct btrfs_device *this_dev); 442 struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, 443 u64 devid, 444 const char *devpath); 445 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, 446 const u64 *devid, 447 const u8 *uuid); 448 void btrfs_free_device(struct btrfs_device *device); 449 int btrfs_rm_device(struct btrfs_fs_info *fs_info, 450 const char *device_path, u64 devid); 451 void __exit btrfs_cleanup_fs_uuids(void); 452 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); 453 int btrfs_grow_device(struct btrfs_trans_handle *trans, 454 struct btrfs_device *device, u64 new_size); 455 struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices, 456 u64 devid, u8 *uuid, u8 *fsid, bool seed); 457 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); 458 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path); 459 int btrfs_balance(struct btrfs_fs_info *fs_info, 460 struct btrfs_balance_control *bctl, 461 struct btrfs_ioctl_balance_args *bargs); 462 void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf); 463 int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); 464 int btrfs_recover_balance(struct btrfs_fs_info *fs_info); 465 int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 466 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 467 int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); 468 int btrfs_uuid_scan_kthread(void *data); 469 int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset); 470 int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 471 u64 *start, u64 *max_avail); 472 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 473 int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, 474 struct btrfs_ioctl_get_dev_stats *stats); 475 void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); 476 int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); 477 int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); 478 void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); 479 void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev); 480 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev); 481 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, 482 u64 logical, u64 len); 483 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, 484 u64 logical); 485 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, 486 u64 chunk_offset, u64 chunk_size); 487 int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset); 488 struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, 489 u64 logical, u64 length); 490 void btrfs_release_disk_super(struct btrfs_super_block *super); 491 492 static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, 493 int index) 494 { 495 atomic_inc(dev->dev_stat_values + index); 496 /* 497 * This memory barrier orders stores updating statistics before stores 498 * updating dev_stats_ccnt. 499 * 500 * It pairs with smp_rmb() in btrfs_run_dev_stats(). 501 */ 502 smp_mb__before_atomic(); 503 atomic_inc(&dev->dev_stats_ccnt); 504 } 505 506 static inline int btrfs_dev_stat_read(struct btrfs_device *dev, 507 int index) 508 { 509 return atomic_read(dev->dev_stat_values + index); 510 } 511 512 static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, 513 int index) 514 { 515 int ret; 516 517 ret = atomic_xchg(dev->dev_stat_values + index, 0); 518 /* 519 * atomic_xchg implies a full memory barriers as per atomic_t.txt: 520 * - RMW operations that have a return value are fully ordered; 521 * 522 * This implicit memory barriers is paired with the smp_rmb in 523 * btrfs_run_dev_stats 524 */ 525 atomic_inc(&dev->dev_stats_ccnt); 526 return ret; 527 } 528 529 static inline void btrfs_dev_stat_set(struct btrfs_device *dev, 530 int index, unsigned long val) 531 { 532 atomic_set(dev->dev_stat_values + index, val); 533 /* 534 * This memory barrier orders stores updating statistics before stores 535 * updating dev_stats_ccnt. 536 * 537 * It pairs with smp_rmb() in btrfs_run_dev_stats(). 538 */ 539 smp_mb__before_atomic(); 540 atomic_inc(&dev->dev_stats_ccnt); 541 } 542 543 /* 544 * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which 545 * can be used as index to access btrfs_raid_array[]. 546 */ 547 static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags) 548 { 549 if (flags & BTRFS_BLOCK_GROUP_RAID10) 550 return BTRFS_RAID_RAID10; 551 else if (flags & BTRFS_BLOCK_GROUP_RAID1) 552 return BTRFS_RAID_RAID1; 553 else if (flags & BTRFS_BLOCK_GROUP_RAID1C3) 554 return BTRFS_RAID_RAID1C3; 555 else if (flags & BTRFS_BLOCK_GROUP_RAID1C4) 556 return BTRFS_RAID_RAID1C4; 557 else if (flags & BTRFS_BLOCK_GROUP_DUP) 558 return BTRFS_RAID_DUP; 559 else if (flags & BTRFS_BLOCK_GROUP_RAID0) 560 return BTRFS_RAID_RAID0; 561 else if (flags & BTRFS_BLOCK_GROUP_RAID5) 562 return BTRFS_RAID_RAID5; 563 else if (flags & BTRFS_BLOCK_GROUP_RAID6) 564 return BTRFS_RAID_RAID6; 565 566 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ 567 } 568 569 void btrfs_commit_device_sizes(struct btrfs_transaction *trans); 570 571 struct list_head * __attribute_const__ btrfs_get_fs_uuids(void); 572 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); 573 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); 574 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, 575 struct btrfs_device *failing_dev); 576 577 int btrfs_bg_type_to_factor(u64 flags); 578 const char *btrfs_bg_type_to_raid_name(u64 flags); 579 int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); 580 581 #endif 582