Lines Matching +full:i +full:- +full:cache +full:- +full:block +full:- +full:size

1 // SPDX-License-Identifier: GPL-2.0
12 #include "rcu-string.h"
13 #include "disk-io.h"
14 #include "block-group.h"
16 #include "dev-replace.h"
17 #include "space-info.h"
26 #define WP_MISSING_DEV ((u64)-1)
28 #define WP_CONVENTIONAL ((u64)-2)
33 * - primary superblock: 0B (zone 0)
34 * - first copy: 512G (zone starting at that offset)
35 * - second copy: 4T (zone starting at that offset)
50 * - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors
51 * - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group
52 * - 1 zone for tree-log dedicated block group
53 * - 1 zone for relocation
58 * Minimum / maximum supported zone size. Currently, SMR disks have a zone
59 * size of 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range.
60 * We do not expect the zone size to become larger than 8GiB or smaller than
73 return (zone->cond == BLK_ZONE_COND_FULL) || in sb_zone_is_full()
74 (zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity); in sb_zone_is_full()
92 int i; in sb_write_pointer() local
94 for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { in sb_write_pointer()
95 ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL); in sb_write_pointer()
96 empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY); in sb_write_pointer()
97 full[i] = sb_zone_is_full(&zones[i]); in sb_write_pointer()
120 return -ENOENT; in sb_write_pointer()
123 struct address_space *mapping = bdev->bd_inode->i_mapping; in sb_write_pointer()
126 int i; in sb_write_pointer() local
128 for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { in sb_write_pointer()
129 u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT; in sb_write_pointer()
130 u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) - in sb_write_pointer()
133 page[i] = read_cache_page_gfp(mapping, in sb_write_pointer()
135 if (IS_ERR(page[i])) { in sb_write_pointer()
136 if (i == 1) in sb_write_pointer()
138 return PTR_ERR(page[i]); in sb_write_pointer()
140 super[i] = page_address(page[i]); in sb_write_pointer()
149 for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) in sb_write_pointer()
150 btrfs_release_disk_super(super[i]); in sb_write_pointer()
156 return -EUCLEAN; in sb_write_pointer()
172 case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break; in sb_zone_number()
173 case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break; in sb_zone_number()
190 return (u64)zone_number << zone_info->zone_size_shift; in zone_start_physical()
194 * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block
201 const sector_t zone_sectors = device->fs_info->zone_size >> SECTOR_SHIFT; in emulate_report_zones()
202 sector_t bdev_size = bdev_nr_sectors(device->bdev); in emulate_report_zones()
203 unsigned int i; in emulate_report_zones() local
206 for (i = 0; i < nr_zones; i++) { in emulate_report_zones()
207 zones[i].start = i * zone_sectors + pos; in emulate_report_zones()
208 zones[i].len = zone_sectors; in emulate_report_zones()
209 zones[i].capacity = zone_sectors; in emulate_report_zones()
210 zones[i].wp = zones[i].start + zone_sectors; in emulate_report_zones()
211 zones[i].type = BLK_ZONE_TYPE_CONVENTIONAL; in emulate_report_zones()
212 zones[i].cond = BLK_ZONE_COND_NOT_WP; in emulate_report_zones()
214 if (zones[i].wp >= bdev_size) { in emulate_report_zones()
215 i++; in emulate_report_zones()
220 return i; in emulate_report_zones()
226 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_get_dev_zones()
232 if (!bdev_is_zoned(device->bdev)) { in btrfs_get_dev_zones()
238 /* Check cache */ in btrfs_get_dev_zones()
239 if (zinfo->zone_cache) { in btrfs_get_dev_zones()
240 unsigned int i; in btrfs_get_dev_zones() local
243 ASSERT(IS_ALIGNED(pos, zinfo->zone_size)); in btrfs_get_dev_zones()
244 zno = pos >> zinfo->zone_size_shift; in btrfs_get_dev_zones()
249 *nr_zones = min_t(u32, *nr_zones, zinfo->nr_zones - zno); in btrfs_get_dev_zones()
251 for (i = 0; i < *nr_zones; i++) { in btrfs_get_dev_zones()
254 zone_info = &zinfo->zone_cache[zno + i]; in btrfs_get_dev_zones()
255 if (!zone_info->len) in btrfs_get_dev_zones()
259 if (i == *nr_zones) { in btrfs_get_dev_zones()
260 /* Cache hit on all the zones */ in btrfs_get_dev_zones()
261 memcpy(zones, zinfo->zone_cache + zno, in btrfs_get_dev_zones()
262 sizeof(*zinfo->zone_cache) * *nr_zones); in btrfs_get_dev_zones()
267 ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, in btrfs_get_dev_zones()
270 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zones()
272 pos, rcu_str_deref(device->name), in btrfs_get_dev_zones()
273 device->devid); in btrfs_get_dev_zones()
278 return -EIO; in btrfs_get_dev_zones()
280 /* Populate cache */ in btrfs_get_dev_zones()
281 if (zinfo->zone_cache) { in btrfs_get_dev_zones()
282 u32 zno = pos >> zinfo->zone_size_shift; in btrfs_get_dev_zones()
284 memcpy(zinfo->zone_cache + zno, zones, in btrfs_get_dev_zones()
285 sizeof(*zinfo->zone_cache) * *nr_zones); in btrfs_get_dev_zones()
291 /* The emulated zone size is determined from the size of device extent */
295 struct btrfs_root *root = fs_info->dev_root; in calculate_emulated_zone_size()
307 return -ENOMEM; in calculate_emulated_zone_size()
313 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { in calculate_emulated_zone_size()
319 ret = -EUCLEAN; in calculate_emulated_zone_size()
324 leaf = path->nodes[0]; in calculate_emulated_zone_size()
325 dext = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); in calculate_emulated_zone_size()
326 fs_info->zone_size = btrfs_dev_extent_length(leaf, dext); in calculate_emulated_zone_size()
337 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_get_dev_zone_info_all_devices()
341 /* fs_info->zone_size might not set yet. Use the incomapt flag here. */ in btrfs_get_dev_zone_info_all_devices()
345 mutex_lock(&fs_devices->device_list_mutex); in btrfs_get_dev_zone_info_all_devices()
346 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_get_dev_zone_info_all_devices()
348 if (!device->bdev) in btrfs_get_dev_zone_info_all_devices()
355 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_get_dev_zone_info_all_devices()
362 struct btrfs_fs_info *fs_info = device->fs_info; in btrfs_get_dev_zone_info()
364 struct block_device *bdev = device->bdev; in btrfs_get_dev_zone_info()
370 unsigned int i, nreported = 0, nr_zones; in btrfs_get_dev_zone_info() local
382 if (device->zone_info) in btrfs_get_dev_zone_info()
387 return -ENOMEM; in btrfs_get_dev_zone_info()
389 device->zone_info = zone_info; in btrfs_get_dev_zone_info()
392 if (!fs_info->zone_size) { in btrfs_get_dev_zone_info()
398 ASSERT(fs_info->zone_size); in btrfs_get_dev_zone_info()
399 zone_sectors = fs_info->zone_size >> SECTOR_SHIFT; in btrfs_get_dev_zone_info()
405 zone_info->zone_size = zone_sectors << SECTOR_SHIFT; in btrfs_get_dev_zone_info()
407 /* We reject devices with a zone size larger than 8GB */ in btrfs_get_dev_zone_info()
408 if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { in btrfs_get_dev_zone_info()
410 "zoned: %s: zone size %llu larger than supported maximum %llu", in btrfs_get_dev_zone_info()
411 rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
412 zone_info->zone_size, BTRFS_MAX_ZONE_SIZE); in btrfs_get_dev_zone_info()
413 ret = -EINVAL; in btrfs_get_dev_zone_info()
415 } else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) { in btrfs_get_dev_zone_info()
417 "zoned: %s: zone size %llu smaller than supported minimum %u", in btrfs_get_dev_zone_info()
418 rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
419 zone_info->zone_size, BTRFS_MIN_ZONE_SIZE); in btrfs_get_dev_zone_info()
420 ret = -EINVAL; in btrfs_get_dev_zone_info()
425 zone_info->zone_size_shift = ilog2(zone_info->zone_size); in btrfs_get_dev_zone_info()
426 zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); in btrfs_get_dev_zone_info()
428 zone_info->nr_zones++; in btrfs_get_dev_zone_info()
434 rcu_str_deref(device->name), max_active_zones, in btrfs_get_dev_zone_info()
436 ret = -EINVAL; in btrfs_get_dev_zone_info()
439 zone_info->max_active_zones = max_active_zones; in btrfs_get_dev_zone_info()
441 zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
442 if (!zone_info->seq_zones) { in btrfs_get_dev_zone_info()
443 ret = -ENOMEM; in btrfs_get_dev_zone_info()
447 zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
448 if (!zone_info->empty_zones) { in btrfs_get_dev_zone_info()
449 ret = -ENOMEM; in btrfs_get_dev_zone_info()
453 zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
454 if (!zone_info->active_zones) { in btrfs_get_dev_zone_info()
455 ret = -ENOMEM; in btrfs_get_dev_zone_info()
461 ret = -ENOMEM; in btrfs_get_dev_zone_info()
466 * Enable zone cache only for a zoned device. On a non-zoned device, we in btrfs_get_dev_zone_info()
468 * use the cache. in btrfs_get_dev_zone_info()
470 if (populate_cache && bdev_is_zoned(device->bdev)) { in btrfs_get_dev_zone_info()
471 zone_info->zone_cache = vcalloc(zone_info->nr_zones, in btrfs_get_dev_zone_info()
473 if (!zone_info->zone_cache) { in btrfs_get_dev_zone_info()
474 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
475 "zoned: failed to allocate zone cache for %s", in btrfs_get_dev_zone_info()
476 rcu_str_deref(device->name)); in btrfs_get_dev_zone_info()
477 ret = -ENOMEM; in btrfs_get_dev_zone_info()
491 for (i = 0; i < nr_zones; i++) { in btrfs_get_dev_zone_info()
492 if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ) in btrfs_get_dev_zone_info()
493 __set_bit(nreported, zone_info->seq_zones); in btrfs_get_dev_zone_info()
494 switch (zones[i].cond) { in btrfs_get_dev_zone_info()
496 __set_bit(nreported, zone_info->empty_zones); in btrfs_get_dev_zone_info()
501 __set_bit(nreported, zone_info->active_zones); in btrfs_get_dev_zone_info()
507 sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; in btrfs_get_dev_zone_info()
510 if (nreported != zone_info->nr_zones) { in btrfs_get_dev_zone_info()
511 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
513 rcu_str_deref(device->name), nreported, in btrfs_get_dev_zone_info()
514 zone_info->nr_zones); in btrfs_get_dev_zone_info()
515 ret = -EIO; in btrfs_get_dev_zone_info()
521 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
523 nactive, rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
525 ret = -EIO; in btrfs_get_dev_zone_info()
528 atomic_set(&zone_info->active_zones_left, in btrfs_get_dev_zone_info()
529 max_active_zones - nactive); in btrfs_get_dev_zone_info()
530 set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); in btrfs_get_dev_zone_info()
535 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { in btrfs_get_dev_zone_info()
538 int sb_pos = BTRFS_NR_SB_LOG_ZONES * i; in btrfs_get_dev_zone_info()
540 sb_zone = sb_zone_number(zone_info->zone_size_shift, i); in btrfs_get_dev_zone_info()
541 if (sb_zone + 1 >= zone_info->nr_zones) in btrfs_get_dev_zone_info()
546 &zone_info->sb_zones[sb_pos], in btrfs_get_dev_zone_info()
552 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
553 "zoned: failed to read super block log zone info at devid %llu zone %u", in btrfs_get_dev_zone_info()
554 device->devid, sb_zone); in btrfs_get_dev_zone_info()
555 ret = -EUCLEAN; in btrfs_get_dev_zone_info()
563 if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type == in btrfs_get_dev_zone_info()
567 ret = sb_write_pointer(device->bdev, in btrfs_get_dev_zone_info()
568 &zone_info->sb_zones[sb_pos], &sb_wp); in btrfs_get_dev_zone_info()
569 if (ret != -ENOENT && ret) { in btrfs_get_dev_zone_info()
570 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
571 "zoned: super block log zone corrupted devid %llu zone %u", in btrfs_get_dev_zone_info()
572 device->devid, sb_zone); in btrfs_get_dev_zone_info()
573 ret = -EUCLEAN; in btrfs_get_dev_zone_info()
583 model = "host-managed zoned"; in btrfs_get_dev_zone_info()
587 model = "host-aware zoned"; in btrfs_get_dev_zone_info()
598 rcu_str_deref(device->name)); in btrfs_get_dev_zone_info()
599 ret = -EOPNOTSUPP; in btrfs_get_dev_zone_info()
604 "%s block device %s, %u %szones of %llu bytes", in btrfs_get_dev_zone_info()
605 model, rcu_str_deref(device->name), zone_info->nr_zones, in btrfs_get_dev_zone_info()
606 emulated, zone_info->zone_size); in btrfs_get_dev_zone_info()
620 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_destroy_dev_zone_info()
625 bitmap_free(zone_info->active_zones); in btrfs_destroy_dev_zone_info()
626 bitmap_free(zone_info->seq_zones); in btrfs_destroy_dev_zone_info()
627 bitmap_free(zone_info->empty_zones); in btrfs_destroy_dev_zone_info()
628 vfree(zone_info->zone_cache); in btrfs_destroy_dev_zone_info()
630 device->zone_info = NULL; in btrfs_destroy_dev_zone_info()
637 zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); in btrfs_clone_dev_zone_info()
641 zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
642 if (!zone_info->seq_zones) in btrfs_clone_dev_zone_info()
645 bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, in btrfs_clone_dev_zone_info()
646 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
648 zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
649 if (!zone_info->empty_zones) in btrfs_clone_dev_zone_info()
652 bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, in btrfs_clone_dev_zone_info()
653 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
655 zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
656 if (!zone_info->active_zones) in btrfs_clone_dev_zone_info()
659 bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, in btrfs_clone_dev_zone_info()
660 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
661 zone_info->zone_cache = NULL; in btrfs_clone_dev_zone_info()
666 bitmap_free(zone_info->seq_zones); in btrfs_clone_dev_zone_info()
667 bitmap_free(zone_info->empty_zones); in btrfs_clone_dev_zone_info()
668 bitmap_free(zone_info->active_zones); in btrfs_clone_dev_zone_info()
681 return ret ? ret : -EIO; in btrfs_get_dev_zone()
690 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { in btrfs_check_for_zoned_device()
691 if (device->bdev && in btrfs_check_for_zoned_device()
692 bdev_zoned_model(device->bdev) == BLK_ZONED_HM) { in btrfs_check_for_zoned_device()
695 device->bdev); in btrfs_check_for_zoned_device()
696 return -EINVAL; in btrfs_check_for_zoned_device()
705 struct queue_limits *lim = &fs_info->limits; in btrfs_check_zoned_mode()
711 * Host-Managed devices can't be used without the ZONED flag. With the in btrfs_check_zoned_mode()
719 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { in btrfs_check_zoned_mode()
720 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_check_zoned_mode()
722 if (!device->bdev) in btrfs_check_zoned_mode()
726 zone_size = zone_info->zone_size; in btrfs_check_zoned_mode()
727 } else if (zone_info->zone_size != zone_size) { in btrfs_check_zoned_mode()
729 "zoned: unequal block device zone sizes: have %llu found %llu", in btrfs_check_zoned_mode()
730 zone_info->zone_size, zone_size); in btrfs_check_zoned_mode()
731 return -EINVAL; in btrfs_check_zoned_mode()
735 * With the zoned emulation, we can have non-zoned device on the in btrfs_check_zoned_mode()
737 * append size. in btrfs_check_zoned_mode()
739 if (bdev_is_zoned(device->bdev)) { in btrfs_check_zoned_mode()
741 &bdev_get_queue(device->bdev)->limits, in btrfs_check_zoned_mode()
753 "zoned: zone size %llu not aligned to stripe %u", in btrfs_check_zoned_mode()
755 return -EINVAL; in btrfs_check_zoned_mode()
759 btrfs_err(fs_info, "zoned: mixed block groups not supported"); in btrfs_check_zoned_mode()
760 return -EINVAL; in btrfs_check_zoned_mode()
763 fs_info->zone_size = zone_size; in btrfs_check_zoned_mode()
771 fs_info->max_zone_append_size = ALIGN_DOWN( in btrfs_check_zoned_mode()
772 min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT, in btrfs_check_zoned_mode()
773 (u64)lim->max_sectors << SECTOR_SHIFT, in btrfs_check_zoned_mode()
774 (u64)lim->max_segments << PAGE_SHIFT), in btrfs_check_zoned_mode()
775 fs_info->sectorsize); in btrfs_check_zoned_mode()
776 fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; in btrfs_check_zoned_mode()
777 if (fs_info->max_zone_append_size < fs_info->max_extent_size) in btrfs_check_zoned_mode()
778 fs_info->max_extent_size = fs_info->max_zone_append_size; in btrfs_check_zoned_mode()
781 * Check mount options here, because we might change fs_info->zoned in btrfs_check_zoned_mode()
782 * from fs_info->zone_size. in btrfs_check_zoned_mode()
788 btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size); in btrfs_check_zoned_mode()
798 * Space cache writing is not COWed. Disable that to avoid write errors in btrfs_check_mountopts_zoned()
802 btrfs_err(info, "zoned: space cache v1 is not supported"); in btrfs_check_mountopts_zoned()
803 return -EINVAL; in btrfs_check_mountopts_zoned()
808 return -EINVAL; in btrfs_check_mountopts_zoned()
829 if (ret != -ENOENT && ret < 0) in sb_log_location()
840 if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { in sb_log_location()
844 reset->start, reset->len, in sb_log_location()
849 reset->cond = BLK_ZONE_COND_EMPTY; in sb_log_location()
850 reset->wp = reset->start; in sb_log_location()
852 } else if (ret != -ENOENT) { in sb_log_location()
867 wp -= BTRFS_SUPER_INFO_SIZE; in sb_log_location()
895 return -EINVAL; in btrfs_sb_log_location_bdev()
902 return -ENOENT; in btrfs_sb_log_location_bdev()
910 return -EIO; in btrfs_sb_log_location_bdev()
918 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_sb_log_location()
922 * For a zoned filesystem on a non-zoned block device, use the same in btrfs_sb_log_location()
923 * super block locations as regular filesystem. Doing so, the super in btrfs_sb_log_location()
924 * block can always be retrieved and the zoned flag of the volume in btrfs_sb_log_location()
925 * detected from the super block information. in btrfs_sb_log_location()
927 if (!bdev_is_zoned(device->bdev)) { in btrfs_sb_log_location()
932 zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); in btrfs_sb_log_location()
933 if (zone_num + 1 >= zinfo->nr_zones) in btrfs_sb_log_location()
934 return -ENOENT; in btrfs_sb_log_location()
936 return sb_log_location(device->bdev, in btrfs_sb_log_location()
937 &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror], in btrfs_sb_log_location()
949 zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); in is_sb_log_zone()
950 if (zone_num + 1 >= zinfo->nr_zones) in is_sb_log_zone()
953 if (!test_bit(zone_num, zinfo->seq_zones)) in is_sb_log_zone()
961 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_advance_sb_log()
963 int i; in btrfs_advance_sb_log() local
968 zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror]; in btrfs_advance_sb_log()
969 for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { in btrfs_advance_sb_log()
971 if (zone->cond == BLK_ZONE_COND_FULL) { in btrfs_advance_sb_log()
976 if (zone->cond == BLK_ZONE_COND_EMPTY) in btrfs_advance_sb_log()
977 zone->cond = BLK_ZONE_COND_IMP_OPEN; in btrfs_advance_sb_log()
979 zone->wp += SUPER_INFO_SECTORS; in btrfs_advance_sb_log()
990 if (zone->wp != zone->start + zone->capacity) { in btrfs_advance_sb_log()
993 ret = blkdev_zone_mgmt(device->bdev, in btrfs_advance_sb_log()
994 REQ_OP_ZONE_FINISH, zone->start, in btrfs_advance_sb_log()
995 zone->len, GFP_NOFS); in btrfs_advance_sb_log()
1000 zone->wp = zone->start + zone->len; in btrfs_advance_sb_log()
1001 zone->cond = BLK_ZONE_COND_FULL; in btrfs_advance_sb_log()
1008 return -EIO; in btrfs_advance_sb_log()
1026 return -ENOENT; in btrfs_reset_sb_log_zones()
1038 * @num_bytes: size of wanted region
1047 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_find_allocatable_zones()
1048 const u8 shift = zinfo->zone_size_shift; in btrfs_find_allocatable_zones()
1053 int i; in btrfs_find_allocatable_zones() local
1055 ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size)); in btrfs_find_allocatable_zones()
1056 ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size)); in btrfs_find_allocatable_zones()
1062 if (end > zinfo->nr_zones) in btrfs_find_allocatable_zones()
1067 !bitmap_test_range_all_set(zinfo->empty_zones, begin, nzones)) { in btrfs_find_allocatable_zones()
1068 pos += zinfo->zone_size; in btrfs_find_allocatable_zones()
1073 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { in btrfs_find_allocatable_zones()
1077 sb_zone = sb_zone_number(shift, i); in btrfs_find_allocatable_zones()
1087 sb_pos = btrfs_sb_offset(i); in btrfs_find_allocatable_zones()
1092 zinfo->zone_size); in btrfs_find_allocatable_zones()
1105 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_dev_set_active_zone()
1106 unsigned int zno = (pos >> zone_info->zone_size_shift); in btrfs_dev_set_active_zone()
1109 if (zone_info->max_active_zones == 0) in btrfs_dev_set_active_zone()
1112 if (!test_bit(zno, zone_info->active_zones)) { in btrfs_dev_set_active_zone()
1114 if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0) in btrfs_dev_set_active_zone()
1116 if (test_and_set_bit(zno, zone_info->active_zones)) { in btrfs_dev_set_active_zone()
1118 atomic_inc(&zone_info->active_zones_left); in btrfs_dev_set_active_zone()
1127 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_dev_clear_active_zone()
1128 unsigned int zno = (pos >> zone_info->zone_size_shift); in btrfs_dev_clear_active_zone()
1131 if (zone_info->max_active_zones == 0) in btrfs_dev_clear_active_zone()
1134 if (test_and_clear_bit(zno, zone_info->active_zones)) in btrfs_dev_clear_active_zone()
1135 atomic_inc(&zone_info->active_zones_left); in btrfs_dev_clear_active_zone()
1144 ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET, in btrfs_reset_device_zone()
1154 physical += device->zone_info->zone_size; in btrfs_reset_device_zone()
1155 length -= device->zone_info->zone_size; in btrfs_reset_device_zone()
1161 int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) in btrfs_ensure_empty_zones() argument
1163 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_ensure_empty_zones()
1164 const u8 shift = zinfo->zone_size_shift; in btrfs_ensure_empty_zones()
1166 unsigned long nbits = size >> shift; in btrfs_ensure_empty_zones()
1170 ASSERT(IS_ALIGNED(start, zinfo->zone_size)); in btrfs_ensure_empty_zones()
1171 ASSERT(IS_ALIGNED(size, zinfo->zone_size)); in btrfs_ensure_empty_zones()
1173 if (begin + nbits > zinfo->nr_zones) in btrfs_ensure_empty_zones()
1174 return -ERANGE; in btrfs_ensure_empty_zones()
1177 if (bitmap_test_range_all_zero(zinfo->seq_zones, begin, nbits)) in btrfs_ensure_empty_zones()
1181 if (bitmap_test_range_all_set(zinfo->seq_zones, begin, nbits) && in btrfs_ensure_empty_zones()
1182 bitmap_test_range_all_set(zinfo->empty_zones, begin, nbits)) in btrfs_ensure_empty_zones()
1185 for (pos = start; pos < start + size; pos += zinfo->zone_size) { in btrfs_ensure_empty_zones()
1194 device->fs_info, in btrfs_ensure_empty_zones()
1196 rcu_str_deref(device->name), device->devid, pos >> shift); in btrfs_ensure_empty_zones()
1199 ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size, in btrfs_ensure_empty_zones()
1210 * for a block group consist of conventional zones. It is pointed to the
1211 * end of the highest addressed extent in the block group as an allocation
1214 static int calculate_alloc_pointer(struct btrfs_block_group *cache, in calculate_alloc_pointer() argument
1217 struct btrfs_fs_info *fs_info = cache->fs_info; in calculate_alloc_pointer()
1226 * Avoid tree lookups for a new block group, there's no use for it. in calculate_alloc_pointer()
1229 * Also, we have a lock chain of extent buffer lock -> chunk mutex. in calculate_alloc_pointer()
1230 * For new a block group, this function is called from in calculate_alloc_pointer()
1242 return -ENOMEM; in calculate_alloc_pointer()
1244 key.objectid = cache->start + cache->length; in calculate_alloc_pointer()
1252 ret = -EUCLEAN; in calculate_alloc_pointer()
1256 ret = btrfs_previous_extent_item(root, path, cache->start); in calculate_alloc_pointer()
1265 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); in calculate_alloc_pointer()
1270 length = fs_info->nodesize; in calculate_alloc_pointer()
1272 if (!(found_key.objectid >= cache->start && in calculate_alloc_pointer()
1273 found_key.objectid + length <= cache->start + cache->length)) { in calculate_alloc_pointer()
1274 ret = -EUCLEAN; in calculate_alloc_pointer()
1277 *offset_ret = found_key.objectid + length - cache->start; in calculate_alloc_pointer()
1295 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; in btrfs_load_zone_info()
1302 info->physical = map->stripes[zone_idx].physical; in btrfs_load_zone_info()
1304 down_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1305 device = map->stripes[zone_idx].dev; in btrfs_load_zone_info()
1307 if (!device->bdev) { in btrfs_load_zone_info()
1308 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1309 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1314 if (!device->zone_info->max_active_zones) in btrfs_load_zone_info()
1317 if (!btrfs_dev_is_sequential(device, info->physical)) { in btrfs_load_zone_info()
1318 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1319 info->alloc_offset = WP_CONVENTIONAL; in btrfs_load_zone_info()
1323 /* This zone will be used for allocation, so mark this zone non-empty. */ in btrfs_load_zone_info()
1324 btrfs_dev_clear_zone_empty(device, info->physical); in btrfs_load_zone_info()
1327 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) in btrfs_load_zone_info()
1328 btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); in btrfs_load_zone_info()
1334 WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); in btrfs_load_zone_info()
1336 ret = btrfs_get_dev_zone(device, info->physical, &zone); in btrfs_load_zone_info()
1339 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1340 if (ret != -EIO && ret != -EOPNOTSUPP) in btrfs_load_zone_info()
1342 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1349 zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), in btrfs_load_zone_info()
1350 device->devid); in btrfs_load_zone_info()
1351 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1352 return -EIO; in btrfs_load_zone_info()
1355 info->capacity = (zone.capacity << SECTOR_SHIFT); in btrfs_load_zone_info()
1362 (info->physical >> device->zone_info->zone_size_shift), in btrfs_load_zone_info()
1363 rcu_str_deref(device->name), device->devid); in btrfs_load_zone_info()
1364 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1367 info->alloc_offset = 0; in btrfs_load_zone_info()
1370 info->alloc_offset = info->capacity; in btrfs_load_zone_info()
1374 info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); in btrfs_load_zone_info()
1379 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1388 if (info->alloc_offset == WP_MISSING_DEV) { in btrfs_load_block_group_single()
1389 btrfs_err(bg->fs_info, in btrfs_load_block_group_single()
1391 info->physical); in btrfs_load_block_group_single()
1392 return -EIO; in btrfs_load_block_group_single()
1395 bg->alloc_offset = info->alloc_offset; in btrfs_load_block_group_single()
1396 bg->zone_capacity = info->capacity; in btrfs_load_block_group_single()
1398 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_single()
1407 if (map->type & BTRFS_BLOCK_GROUP_DATA) { in btrfs_load_block_group_dup()
1408 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1410 return -EINVAL; in btrfs_load_block_group_dup()
1414 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1417 return -EIO; in btrfs_load_block_group_dup()
1420 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1423 return -EIO; in btrfs_load_block_group_dup()
1426 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1428 return -EIO; in btrfs_load_block_group_dup()
1433 return -EIO; in btrfs_load_block_group_dup()
1435 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_dup()
1438 bg->alloc_offset = zone_info[0].alloc_offset; in btrfs_load_block_group_dup()
1439 bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); in btrfs_load_block_group_dup()
1443 int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) in btrfs_load_block_group_zone_info() argument
1445 struct btrfs_fs_info *fs_info = cache->fs_info; in btrfs_load_block_group_zone_info()
1446 struct extent_map_tree *em_tree = &fs_info->mapping_tree; in btrfs_load_block_group_zone_info()
1449 u64 logical = cache->start; in btrfs_load_block_group_zone_info()
1450 u64 length = cache->length; in btrfs_load_block_group_zone_info()
1453 int i; in btrfs_load_block_group_zone_info() local
1462 if (!IS_ALIGNED(length, fs_info->zone_size)) { in btrfs_load_block_group_zone_info()
1464 "zoned: block group %llu len %llu unaligned to zone size %llu", in btrfs_load_block_group_zone_info()
1465 logical, length, fs_info->zone_size); in btrfs_load_block_group_zone_info()
1466 return -EIO; in btrfs_load_block_group_zone_info()
1470 read_lock(&em_tree->lock); in btrfs_load_block_group_zone_info()
1472 read_unlock(&em_tree->lock); in btrfs_load_block_group_zone_info()
1475 return -EINVAL; in btrfs_load_block_group_zone_info()
1477 map = em->map_lookup; in btrfs_load_block_group_zone_info()
1479 cache->physical_map = kmemdup(map, map_lookup_size(map->num_stripes), GFP_NOFS); in btrfs_load_block_group_zone_info()
1480 if (!cache->physical_map) { in btrfs_load_block_group_zone_info()
1481 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1485 zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); in btrfs_load_block_group_zone_info()
1487 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1491 active = bitmap_zalloc(map->num_stripes, GFP_NOFS); in btrfs_load_block_group_zone_info()
1493 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1497 for (i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_zone_info()
1498 ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); in btrfs_load_block_group_zone_info()
1502 if (zone_info[i].alloc_offset == WP_CONVENTIONAL) in btrfs_load_block_group_zone_info()
1509 set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); in btrfs_load_block_group_zone_info()
1512 /* Zone capacity is always zone size in emulation */ in btrfs_load_block_group_zone_info()
1513 cache->zone_capacity = cache->length; in btrfs_load_block_group_zone_info()
1514 ret = calculate_alloc_pointer(cache, &last_alloc, new); in btrfs_load_block_group_zone_info()
1518 cache->start); in btrfs_load_block_group_zone_info()
1520 } else if (map->num_stripes == num_conventional) { in btrfs_load_block_group_zone_info()
1521 cache->alloc_offset = last_alloc; in btrfs_load_block_group_zone_info()
1522 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); in btrfs_load_block_group_zone_info()
1527 switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { in btrfs_load_block_group_zone_info()
1529 ret = btrfs_load_block_group_single(cache, &zone_info[0], active); in btrfs_load_block_group_zone_info()
1532 ret = btrfs_load_block_group_dup(cache, map, zone_info, active); in btrfs_load_block_group_zone_info()
1539 /* non-single profiles are not supported yet */ in btrfs_load_block_group_zone_info()
1542 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_zone_info()
1543 ret = -EINVAL; in btrfs_load_block_group_zone_info()
1548 if (cache->alloc_offset > fs_info->zone_size) { in btrfs_load_block_group_zone_info()
1550 "zoned: invalid write pointer %llu in block group %llu", in btrfs_load_block_group_zone_info()
1551 cache->alloc_offset, cache->start); in btrfs_load_block_group_zone_info()
1552 ret = -EIO; in btrfs_load_block_group_zone_info()
1555 if (cache->alloc_offset > cache->zone_capacity) { in btrfs_load_block_group_zone_info()
1557 "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", in btrfs_load_block_group_zone_info()
1558 cache->alloc_offset, cache->zone_capacity, in btrfs_load_block_group_zone_info()
1559 cache->start); in btrfs_load_block_group_zone_info()
1560 ret = -EIO; in btrfs_load_block_group_zone_info()
1564 if (!ret && num_conventional && last_alloc > cache->alloc_offset) { in btrfs_load_block_group_zone_info()
1567 logical, last_alloc, cache->alloc_offset); in btrfs_load_block_group_zone_info()
1568 ret = -EIO; in btrfs_load_block_group_zone_info()
1572 cache->meta_write_pointer = cache->alloc_offset + cache->start; in btrfs_load_block_group_zone_info()
1573 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) { in btrfs_load_block_group_zone_info()
1574 btrfs_get_block_group(cache); in btrfs_load_block_group_zone_info()
1575 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_load_block_group_zone_info()
1576 list_add_tail(&cache->active_bg_list, in btrfs_load_block_group_zone_info()
1577 &fs_info->zone_active_bgs); in btrfs_load_block_group_zone_info()
1578 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_load_block_group_zone_info()
1581 kfree(cache->physical_map); in btrfs_load_block_group_zone_info()
1582 cache->physical_map = NULL; in btrfs_load_block_group_zone_info()
1591 void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) in btrfs_calc_zone_unusable() argument
1595 if (!btrfs_is_zoned(cache->fs_info)) in btrfs_calc_zone_unusable()
1598 WARN_ON(cache->bytes_super != 0); in btrfs_calc_zone_unusable()
1599 unusable = (cache->alloc_offset - cache->used) + in btrfs_calc_zone_unusable()
1600 (cache->length - cache->zone_capacity); in btrfs_calc_zone_unusable()
1601 free = cache->zone_capacity - cache->alloc_offset; in btrfs_calc_zone_unusable()
1603 /* We only need ->free_space in ALLOC_SEQ block groups */ in btrfs_calc_zone_unusable()
1604 cache->cached = BTRFS_CACHE_FINISHED; in btrfs_calc_zone_unusable()
1605 cache->free_space_ctl->free_space = free; in btrfs_calc_zone_unusable()
1606 cache->zone_unusable = unusable; in btrfs_calc_zone_unusable()
1612 if (!btrfs_is_zoned(eb->fs_info) || in btrfs_redirty_list_add()
1616 ASSERT(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); in btrfs_redirty_list_add()
1618 memzero_extent_buffer(eb, 0, eb->len); in btrfs_redirty_list_add()
1619 set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags); in btrfs_redirty_list_add()
1621 set_extent_bit(&trans->dirty_pages, eb->start, eb->start + eb->len - 1, in btrfs_redirty_list_add()
1627 u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT); in btrfs_use_zone_append()
1628 struct btrfs_inode *inode = bbio->inode; in btrfs_use_zone_append()
1629 struct btrfs_fs_info *fs_info = bbio->fs_info; in btrfs_use_zone_append()
1630 struct btrfs_block_group *cache; in btrfs_use_zone_append() local
1636 if (!inode || !is_data_inode(&inode->vfs_inode)) in btrfs_use_zone_append()
1639 if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) in btrfs_use_zone_append()
1645 * Furthermore we have set aside own block-group from which only the in btrfs_use_zone_append()
1650 if (btrfs_is_data_reloc_root(inode->root)) in btrfs_use_zone_append()
1653 cache = btrfs_lookup_block_group(fs_info, start); in btrfs_use_zone_append()
1654 ASSERT(cache); in btrfs_use_zone_append()
1655 if (!cache) in btrfs_use_zone_append()
1658 ret = !!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); in btrfs_use_zone_append()
1659 btrfs_put_block_group(cache); in btrfs_use_zone_append()
1666 const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; in btrfs_record_physical_zoned()
1667 struct btrfs_ordered_sum *sum = bbio->sums; in btrfs_record_physical_zoned()
1669 if (physical < bbio->orig_physical) in btrfs_record_physical_zoned()
1670 sum->logical -= bbio->orig_physical - physical; in btrfs_record_physical_zoned()
1672 sum->logical += physical - bbio->orig_physical; in btrfs_record_physical_zoned()
1678 struct extent_map_tree *em_tree = &BTRFS_I(ordered->inode)->extent_tree; in btrfs_rewrite_logical_zoned()
1681 ordered->disk_bytenr = logical; in btrfs_rewrite_logical_zoned()
1683 write_lock(&em_tree->lock); in btrfs_rewrite_logical_zoned()
1684 em = search_extent_mapping(em_tree, ordered->file_offset, in btrfs_rewrite_logical_zoned()
1685 ordered->num_bytes); in btrfs_rewrite_logical_zoned()
1686 em->block_start = logical; in btrfs_rewrite_logical_zoned()
1688 write_unlock(&em_tree->lock); in btrfs_rewrite_logical_zoned()
1696 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && in btrfs_zoned_split_ordered()
1697 split_extent_map(BTRFS_I(ordered->inode), ordered->file_offset, in btrfs_zoned_split_ordered()
1698 ordered->num_bytes, len, logical)) in btrfs_zoned_split_ordered()
1704 new->disk_bytenr = logical; in btrfs_zoned_split_ordered()
1711 struct btrfs_inode *inode = BTRFS_I(ordered->inode); in btrfs_finish_ordered_zoned()
1712 struct btrfs_fs_info *fs_info = inode->root->fs_info; in btrfs_finish_ordered_zoned()
1717 * Write to pre-allocated region is for the data relocation, and so in btrfs_finish_ordered_zoned()
1720 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) in btrfs_finish_ordered_zoned()
1723 ASSERT(!list_empty(&ordered->list)); in btrfs_finish_ordered_zoned()
1724 /* The ordered->list can be empty in the above pre-alloc case. */ in btrfs_finish_ordered_zoned()
1725 sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list); in btrfs_finish_ordered_zoned()
1726 logical = sum->logical; in btrfs_finish_ordered_zoned()
1727 len = sum->len; in btrfs_finish_ordered_zoned()
1729 while (len < ordered->disk_num_bytes) { in btrfs_finish_ordered_zoned()
1731 if (sum->logical == logical + len) { in btrfs_finish_ordered_zoned()
1732 len += sum->len; in btrfs_finish_ordered_zoned()
1736 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); in btrfs_finish_ordered_zoned()
1740 logical = sum->logical; in btrfs_finish_ordered_zoned()
1741 len = sum->len; in btrfs_finish_ordered_zoned()
1744 if (ordered->disk_bytenr != logical) in btrfs_finish_ordered_zoned()
1749 * If we end up here for nodatasum I/O, the btrfs_ordered_sum structures in btrfs_finish_ordered_zoned()
1754 if ((inode->flags & BTRFS_INODE_NODATASUM) || in btrfs_finish_ordered_zoned()
1755 test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) { in btrfs_finish_ordered_zoned()
1756 while ((sum = list_first_entry_or_null(&ordered->list, in btrfs_finish_ordered_zoned()
1758 list_del(&sum->list); in btrfs_finish_ordered_zoned()
1767 const struct writeback_control *wbc = ctx->wbc; in check_bg_is_active()
1768 struct btrfs_block_group *block_group = ctx->zoned_bg; in check_bg_is_active()
1769 struct btrfs_fs_info *fs_info = block_group->fs_info; in check_bg_is_active()
1771 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) in check_bg_is_active()
1774 if (fs_info->treelog_bg == block_group->start) { in check_bg_is_active()
1784 /* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */ in check_bg_is_active()
1785 lockdep_assert_held(&fs_info->zoned_meta_io_lock); in check_bg_is_active()
1792 if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) { in check_bg_is_active()
1793 if (wbc->sync_mode == WB_SYNC_NONE || in check_bg_is_active()
1794 (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)) in check_bg_is_active()
1798 /* Pivot active metadata/system block group. */ in check_bg_is_active()
1821 * Check if @ctx->eb is aligned to the write pointer.
1824 * 0: @ctx->eb is at the write pointer. You can write it.
1825 * -EAGAIN: There is a hole. The caller should handle the case.
1826 * -EBUSY: There is a hole, but the caller can just bail out.
1831 const struct writeback_control *wbc = ctx->wbc; in btrfs_check_meta_write_pointer()
1832 const struct extent_buffer *eb = ctx->eb; in btrfs_check_meta_write_pointer()
1833 struct btrfs_block_group *block_group = ctx->zoned_bg; in btrfs_check_meta_write_pointer()
1839 if (block_group->start > eb->start || in btrfs_check_meta_write_pointer()
1840 block_group->start + block_group->length <= eb->start) { in btrfs_check_meta_write_pointer()
1843 ctx->zoned_bg = NULL; in btrfs_check_meta_write_pointer()
1848 block_group = btrfs_lookup_block_group(fs_info, eb->start); in btrfs_check_meta_write_pointer()
1851 ctx->zoned_bg = block_group; in btrfs_check_meta_write_pointer()
1854 if (block_group->meta_write_pointer == eb->start) { in btrfs_check_meta_write_pointer()
1857 if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) in btrfs_check_meta_write_pointer()
1860 if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) in btrfs_check_meta_write_pointer()
1861 tgt = &fs_info->active_system_bg; in btrfs_check_meta_write_pointer()
1863 tgt = &fs_info->active_meta_bg; in btrfs_check_meta_write_pointer()
1869 * Since we may release fs_info->zoned_meta_io_lock, someone can already in btrfs_check_meta_write_pointer()
1872 if (block_group->meta_write_pointer > eb->start) in btrfs_check_meta_write_pointer()
1873 return -EBUSY; in btrfs_check_meta_write_pointer()
1876 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) in btrfs_check_meta_write_pointer()
1877 return -EAGAIN; in btrfs_check_meta_write_pointer()
1878 return -EBUSY; in btrfs_check_meta_write_pointer()
1884 return -EOPNOTSUPP; in btrfs_zoned_issue_zeroout()
1886 return blkdev_issue_zeroout(device->bdev, physical >> SECTOR_SHIFT, in btrfs_zoned_issue_zeroout()
1897 int i, ret; in read_zone_info() local
1902 ret = -EIO; in read_zone_info()
1906 if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { in read_zone_info()
1907 ret = -EINVAL; in read_zone_info()
1912 nmirrors = (int)bioc->num_stripes; in read_zone_info()
1913 for (i = 0; i < nmirrors; i++) { in read_zone_info()
1914 u64 physical = bioc->stripes[i].physical; in read_zone_info()
1915 struct btrfs_device *dev = bioc->stripes[i].dev; in read_zone_info()
1918 if (!dev->bdev) in read_zone_info()
1923 if (ret == -EIO || ret == -EOPNOTSUPP) in read_zone_info()
1935 * filling zeros between @physical_pos to a write pointer of dev-replace
1941 struct btrfs_fs_info *fs_info = tgt_dev->fs_info; in btrfs_sync_zone_write_pointer()
1954 wp = physical_start + ((zone.wp - zone.start) << SECTOR_SHIFT); in btrfs_sync_zone_write_pointer()
1960 return -EUCLEAN; in btrfs_sync_zone_write_pointer()
1962 length = wp - physical_pos; in btrfs_sync_zone_write_pointer()
1967 * Activate block group and underlying device zones
1969 * @block_group: the block group to activate
1975 struct btrfs_fs_info *fs_info = block_group->fs_info; in btrfs_zone_activate()
1979 const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA); in btrfs_zone_activate()
1981 int i; in btrfs_zone_activate() local
1983 if (!btrfs_is_zoned(block_group->fs_info)) in btrfs_zone_activate()
1986 map = block_group->physical_map; in btrfs_zone_activate()
1988 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
1989 spin_lock(&block_group->lock); in btrfs_zone_activate()
1990 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { in btrfs_zone_activate()
2001 for (i = 0; i < map->num_stripes; i++) { in btrfs_zone_activate()
2005 device = map->stripes[i].dev; in btrfs_zone_activate()
2006 physical = map->stripes[i].physical; in btrfs_zone_activate()
2007 zinfo = device->zone_info; in btrfs_zone_activate()
2009 if (zinfo->max_active_zones == 0) in btrfs_zone_activate()
2013 reserved = zinfo->reserved_active_zones; in btrfs_zone_activate()
2015 * For the data block group, leave active zones for one in btrfs_zone_activate()
2016 * metadata block group and one system block group. in btrfs_zone_activate()
2018 if (atomic_read(&zinfo->active_zones_left) <= reserved) { in btrfs_zone_activate()
2029 zinfo->reserved_active_zones--; in btrfs_zone_activate()
2033 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); in btrfs_zone_activate()
2034 spin_unlock(&block_group->lock); in btrfs_zone_activate()
2036 /* For the active block group list */ in btrfs_zone_activate()
2038 list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); in btrfs_zone_activate()
2039 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2044 spin_unlock(&block_group->lock); in btrfs_zone_activate()
2045 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2051 struct btrfs_fs_info *fs_info = block_group->fs_info; in wait_eb_writebacks()
2052 const u64 end = block_group->start + block_group->length; in wait_eb_writebacks()
2058 radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, in wait_eb_writebacks()
2059 block_group->start >> fs_info->sectorsize_bits) { in wait_eb_writebacks()
2068 if (eb->start < block_group->start) in wait_eb_writebacks()
2070 if (eb->start >= end) in wait_eb_writebacks()
2083 struct btrfs_fs_info *fs_info = block_group->fs_info; in do_zone_finish()
2085 const bool is_metadata = (block_group->flags & in do_zone_finish()
2088 int i; in do_zone_finish() local
2090 spin_lock(&block_group->lock); in do_zone_finish()
2091 if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { in do_zone_finish()
2092 spin_unlock(&block_group->lock); in do_zone_finish()
2098 block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) { in do_zone_finish()
2099 spin_unlock(&block_group->lock); in do_zone_finish()
2100 return -EAGAIN; in do_zone_finish()
2104 * If we are sure that the block group is full (= no more room left for in do_zone_finish()
2105 * new allocation) and the IO for the last usable block is completed, we in do_zone_finish()
2108 * and block_group->meta_write_pointer for metadata. in do_zone_finish()
2111 if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { in do_zone_finish()
2112 spin_unlock(&block_group->lock); in do_zone_finish()
2113 return -EAGAIN; in do_zone_finish()
2115 spin_unlock(&block_group->lock); in do_zone_finish()
2121 /* Ensure all writes in this block group finish */ in do_zone_finish()
2124 btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start, in do_zone_finish()
2125 block_group->length); in do_zone_finish()
2130 spin_lock(&block_group->lock); in do_zone_finish()
2133 * Bail out if someone already deactivated the block group, or in do_zone_finish()
2134 * allocated space is left in the block group. in do_zone_finish()
2137 &block_group->runtime_flags)) { in do_zone_finish()
2138 spin_unlock(&block_group->lock); in do_zone_finish()
2143 if (block_group->reserved || in do_zone_finish()
2145 &block_group->runtime_flags)) { in do_zone_finish()
2146 spin_unlock(&block_group->lock); in do_zone_finish()
2148 return -EAGAIN; in do_zone_finish()
2152 clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); in do_zone_finish()
2153 block_group->alloc_offset = block_group->zone_capacity; in do_zone_finish()
2154 if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) in do_zone_finish()
2155 block_group->meta_write_pointer = block_group->start + in do_zone_finish()
2156 block_group->zone_capacity; in do_zone_finish()
2157 block_group->free_space_ctl->free_space = 0; in do_zone_finish()
2160 spin_unlock(&block_group->lock); in do_zone_finish()
2162 map = block_group->physical_map; in do_zone_finish()
2163 for (i = 0; i < map->num_stripes; i++) { in do_zone_finish()
2164 struct btrfs_device *device = map->stripes[i].dev; in do_zone_finish()
2165 const u64 physical = map->stripes[i].physical; in do_zone_finish()
2166 struct btrfs_zoned_device_info *zinfo = device->zone_info; in do_zone_finish()
2168 if (zinfo->max_active_zones == 0) in do_zone_finish()
2171 ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, in do_zone_finish()
2173 zinfo->zone_size >> SECTOR_SHIFT, in do_zone_finish()
2179 if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) in do_zone_finish()
2180 zinfo->reserved_active_zones++; in do_zone_finish()
2187 spin_lock(&fs_info->zone_active_bgs_lock); in do_zone_finish()
2188 ASSERT(!list_empty(&block_group->active_bg_list)); in do_zone_finish()
2189 list_del_init(&block_group->active_bg_list); in do_zone_finish()
2190 spin_unlock(&fs_info->zone_active_bgs_lock); in do_zone_finish()
2195 clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); in do_zone_finish()
2202 if (!btrfs_is_zoned(block_group->fs_info)) in btrfs_zone_finish()
2210 struct btrfs_fs_info *fs_info = fs_devices->fs_info; in btrfs_can_activate_zone()
2218 mutex_lock(&fs_info->chunk_mutex); in btrfs_can_activate_zone()
2219 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_can_activate_zone()
2220 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { in btrfs_can_activate_zone()
2221 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_can_activate_zone()
2224 if (!device->bdev) in btrfs_can_activate_zone()
2227 if (!zinfo->max_active_zones) { in btrfs_can_activate_zone()
2233 reserved = zinfo->reserved_active_zones; in btrfs_can_activate_zone()
2237 ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved)); in btrfs_can_activate_zone()
2240 ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved)); in btrfs_can_activate_zone()
2246 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_can_activate_zone()
2247 mutex_unlock(&fs_info->chunk_mutex); in btrfs_can_activate_zone()
2250 set_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); in btrfs_can_activate_zone()
2267 if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) in btrfs_zone_finish_endio()
2268 min_alloc_bytes = fs_info->sectorsize; in btrfs_zone_finish_endio()
2270 min_alloc_bytes = fs_info->nodesize; in btrfs_zone_finish_endio()
2272 /* Bail out if we can allocate more data from this block group. */ in btrfs_zone_finish_endio()
2274 block_group->start + block_group->zone_capacity) in btrfs_zone_finish_endio()
2288 wait_on_extent_buffer_writeback(bg->last_eb); in btrfs_zone_finish_endio_workfn()
2289 free_extent_buffer(bg->last_eb); in btrfs_zone_finish_endio_workfn()
2290 btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length); in btrfs_zone_finish_endio_workfn()
2297 if (!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &bg->runtime_flags) || in btrfs_schedule_zone_finish_bg()
2298 eb->start + eb->len * 2 <= bg->start + bg->zone_capacity) in btrfs_schedule_zone_finish_bg()
2301 if (WARN_ON(bg->zone_finish_work.func == btrfs_zone_finish_endio_workfn)) { in btrfs_schedule_zone_finish_bg()
2302 btrfs_err(bg->fs_info, "double scheduling of bg %llu zone finishing", in btrfs_schedule_zone_finish_bg()
2303 bg->start); in btrfs_schedule_zone_finish_bg()
2309 atomic_inc(&eb->refs); in btrfs_schedule_zone_finish_bg()
2310 bg->last_eb = eb; in btrfs_schedule_zone_finish_bg()
2311 INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn); in btrfs_schedule_zone_finish_bg()
2312 queue_work(system_unbound_wq, &bg->zone_finish_work); in btrfs_schedule_zone_finish_bg()
2317 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_clear_data_reloc_bg()
2319 spin_lock(&fs_info->relocation_bg_lock); in btrfs_clear_data_reloc_bg()
2320 if (fs_info->data_reloc_bg == bg->start) in btrfs_clear_data_reloc_bg()
2321 fs_info->data_reloc_bg = 0; in btrfs_clear_data_reloc_bg()
2322 spin_unlock(&fs_info->relocation_bg_lock); in btrfs_clear_data_reloc_bg()
2327 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_free_zone_cache()
2333 mutex_lock(&fs_devices->device_list_mutex); in btrfs_free_zone_cache()
2334 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_free_zone_cache()
2335 if (device->zone_info) { in btrfs_free_zone_cache()
2336 vfree(device->zone_info->zone_cache); in btrfs_free_zone_cache()
2337 device->zone_info->zone_cache = NULL; in btrfs_free_zone_cache()
2340 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_free_zone_cache()
2345 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_zoned_should_reclaim()
2353 if (fs_info->bg_reclaim_threshold == 0) in btrfs_zoned_should_reclaim()
2356 mutex_lock(&fs_devices->device_list_mutex); in btrfs_zoned_should_reclaim()
2357 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_zoned_should_reclaim()
2358 if (!device->bdev) in btrfs_zoned_should_reclaim()
2361 total += device->disk_total_bytes; in btrfs_zoned_should_reclaim()
2362 used += device->bytes_used; in btrfs_zoned_should_reclaim()
2364 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_zoned_should_reclaim()
2367 return factor >= fs_info->bg_reclaim_threshold; in btrfs_zoned_should_reclaim()
2379 /* It should be called on a previous data relocation block group. */ in btrfs_zoned_release_data_reloc_bg()
2380 ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); in btrfs_zoned_release_data_reloc_bg()
2382 spin_lock(&block_group->lock); in btrfs_zoned_release_data_reloc_bg()
2383 if (!test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) in btrfs_zoned_release_data_reloc_bg()
2387 if (block_group->start + block_group->alloc_offset == logical + length) { in btrfs_zoned_release_data_reloc_bg()
2389 * Now, release this block group for further allocations and in btrfs_zoned_release_data_reloc_bg()
2393 &block_group->runtime_flags); in btrfs_zoned_release_data_reloc_bg()
2397 spin_unlock(&block_group->lock); in btrfs_zoned_release_data_reloc_bg()
2408 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_zone_finish_one_bg()
2409 list_for_each_entry(block_group, &fs_info->zone_active_bgs, in btrfs_zone_finish_one_bg()
2413 spin_lock(&block_group->lock); in btrfs_zone_finish_one_bg()
2414 if (block_group->reserved || block_group->alloc_offset == 0 || in btrfs_zone_finish_one_bg()
2415 (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || in btrfs_zone_finish_one_bg()
2416 test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { in btrfs_zone_finish_one_bg()
2417 spin_unlock(&block_group->lock); in btrfs_zone_finish_one_bg()
2421 avail = block_group->zone_capacity - block_group->alloc_offset; in btrfs_zone_finish_one_bg()
2429 spin_unlock(&block_group->lock); in btrfs_zone_finish_one_bg()
2431 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_finish_one_bg()
2449 if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) in btrfs_zoned_activate_one_bg()
2456 down_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2458 list_for_each_entry(bg, &space_info->block_groups[index], in btrfs_zoned_activate_one_bg()
2460 if (!spin_trylock(&bg->lock)) in btrfs_zoned_activate_one_bg()
2464 &bg->runtime_flags)) { in btrfs_zoned_activate_one_bg()
2465 spin_unlock(&bg->lock); in btrfs_zoned_activate_one_bg()
2468 spin_unlock(&bg->lock); in btrfs_zoned_activate_one_bg()
2471 up_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2478 up_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2494 * Reserve zones for one metadata block group, one tree-log block group, and one
2495 * system block group.
2499 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_check_active_zone_reservation()
2502 /* Reserve zones for normal SINGLE metadata and tree-log block group. */ in btrfs_check_active_zone_reservation()
2504 /* Reserve a zone for SINGLE system block group. */ in btrfs_check_active_zone_reservation()
2507 if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) in btrfs_check_active_zone_reservation()
2514 if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP) in btrfs_check_active_zone_reservation()
2516 if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP) in btrfs_check_active_zone_reservation()
2520 mutex_lock(&fs_devices->device_list_mutex); in btrfs_check_active_zone_reservation()
2521 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_check_active_zone_reservation()
2522 if (!device->bdev) in btrfs_check_active_zone_reservation()
2525 device->zone_info->reserved_active_zones = in btrfs_check_active_zone_reservation()
2528 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_check_active_zone_reservation()
2530 /* Release reservation for currently active block groups. */ in btrfs_check_active_zone_reservation()
2531 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_check_active_zone_reservation()
2532 list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { in btrfs_check_active_zone_reservation()
2533 struct map_lookup *map = block_group->physical_map; in btrfs_check_active_zone_reservation()
2535 if (!(block_group->flags & in btrfs_check_active_zone_reservation()
2539 for (int i = 0; i < map->num_stripes; i++) in btrfs_check_active_zone_reservation() local
2540 map->stripes[i].dev->zone_info->reserved_active_zones--; in btrfs_check_active_zone_reservation()
2542 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_check_active_zone_reservation()