19888c340SDavid Sterba /* SPDX-License-Identifier: GPL-2.0 */ 253b381b3SDavid Woodhouse /* 353b381b3SDavid Woodhouse * Copyright (C) 2012 Fusion-io All rights reserved. 453b381b3SDavid Woodhouse * Copyright (C) 2012 Intel Corp. All rights reserved. 553b381b3SDavid Woodhouse */ 653b381b3SDavid Woodhouse 79888c340SDavid Sterba #ifndef BTRFS_RAID56_H 89888c340SDavid Sterba #define BTRFS_RAID56_H 99888c340SDavid Sterba 10*b8bea09aSQu Wenruo #include <linux/workqueue.h> 11*b8bea09aSQu Wenruo #include "volumes.h" 12*b8bea09aSQu Wenruo 13*b8bea09aSQu Wenruo enum btrfs_rbio_ops { 14*b8bea09aSQu Wenruo BTRFS_RBIO_WRITE, 15*b8bea09aSQu Wenruo BTRFS_RBIO_READ_REBUILD, 16*b8bea09aSQu Wenruo BTRFS_RBIO_PARITY_SCRUB, 17*b8bea09aSQu Wenruo BTRFS_RBIO_REBUILD_MISSING, 18*b8bea09aSQu Wenruo }; 19*b8bea09aSQu Wenruo 20*b8bea09aSQu Wenruo struct btrfs_raid_bio { 21*b8bea09aSQu Wenruo struct btrfs_io_context *bioc; 22*b8bea09aSQu Wenruo 23*b8bea09aSQu Wenruo /* 24*b8bea09aSQu Wenruo * While we're doing RMW on a stripe we put it into a hash table so we 25*b8bea09aSQu Wenruo * can lock the stripe and merge more rbios into it. 26*b8bea09aSQu Wenruo */ 27*b8bea09aSQu Wenruo struct list_head hash_list; 28*b8bea09aSQu Wenruo 29*b8bea09aSQu Wenruo /* LRU list for the stripe cache */ 30*b8bea09aSQu Wenruo struct list_head stripe_cache; 31*b8bea09aSQu Wenruo 32*b8bea09aSQu Wenruo /* For scheduling work in the helper threads */ 33*b8bea09aSQu Wenruo struct work_struct work; 34*b8bea09aSQu Wenruo 35*b8bea09aSQu Wenruo /* 36*b8bea09aSQu Wenruo * bio_list and bio_list_lock are used to add more bios into the stripe 37*b8bea09aSQu Wenruo * in hopes of avoiding the full RMW 38*b8bea09aSQu Wenruo */ 39*b8bea09aSQu Wenruo struct bio_list bio_list; 40*b8bea09aSQu Wenruo spinlock_t bio_list_lock; 41*b8bea09aSQu Wenruo 42*b8bea09aSQu Wenruo /* 43*b8bea09aSQu Wenruo * Also protected by the bio_list_lock, the plug list is used by the 44*b8bea09aSQu Wenruo * plugging code to collect partial bios while plugged. The stripe 45*b8bea09aSQu Wenruo * locking code also uses it to hand off the stripe lock to the next 46*b8bea09aSQu Wenruo * pending IO. 47*b8bea09aSQu Wenruo */ 48*b8bea09aSQu Wenruo struct list_head plug_list; 49*b8bea09aSQu Wenruo 50*b8bea09aSQu Wenruo /* Flags that tell us if it is safe to merge with this bio. */ 51*b8bea09aSQu Wenruo unsigned long flags; 52*b8bea09aSQu Wenruo 53*b8bea09aSQu Wenruo /* 54*b8bea09aSQu Wenruo * Set if we're doing a parity rebuild for a read from higher up, which 55*b8bea09aSQu Wenruo * is handled differently from a parity rebuild as part of RMW. 56*b8bea09aSQu Wenruo */ 57*b8bea09aSQu Wenruo enum btrfs_rbio_ops operation; 58*b8bea09aSQu Wenruo 59*b8bea09aSQu Wenruo /* Size of each individual stripe on disk */ 60*b8bea09aSQu Wenruo u32 stripe_len; 61*b8bea09aSQu Wenruo 62*b8bea09aSQu Wenruo /* How many pages there are for the full stripe including P/Q */ 63*b8bea09aSQu Wenruo u16 nr_pages; 64*b8bea09aSQu Wenruo 65*b8bea09aSQu Wenruo /* How many sectors there are for the full stripe including P/Q */ 66*b8bea09aSQu Wenruo u16 nr_sectors; 67*b8bea09aSQu Wenruo 68*b8bea09aSQu Wenruo /* Number of data stripes (no p/q) */ 69*b8bea09aSQu Wenruo u8 nr_data; 70*b8bea09aSQu Wenruo 71*b8bea09aSQu Wenruo /* Numer of all stripes (including P/Q) */ 72*b8bea09aSQu Wenruo u8 real_stripes; 73*b8bea09aSQu Wenruo 74*b8bea09aSQu Wenruo /* How many pages there are for each stripe */ 75*b8bea09aSQu Wenruo u8 stripe_npages; 76*b8bea09aSQu Wenruo 77*b8bea09aSQu Wenruo /* How many sectors there are for each stripe */ 78*b8bea09aSQu Wenruo u8 stripe_nsectors; 79*b8bea09aSQu Wenruo 80*b8bea09aSQu Wenruo /* First bad stripe, -1 means no corruption */ 81*b8bea09aSQu Wenruo s8 faila; 82*b8bea09aSQu Wenruo 83*b8bea09aSQu Wenruo /* Second bad stripe (for RAID6 use) */ 84*b8bea09aSQu Wenruo s8 failb; 85*b8bea09aSQu Wenruo 86*b8bea09aSQu Wenruo /* Stripe number that we're scrubbing */ 87*b8bea09aSQu Wenruo u8 scrubp; 88*b8bea09aSQu Wenruo 89*b8bea09aSQu Wenruo /* 90*b8bea09aSQu Wenruo * Size of all the bios in the bio_list. This helps us decide if the 91*b8bea09aSQu Wenruo * rbio maps to a full stripe or not. 92*b8bea09aSQu Wenruo */ 93*b8bea09aSQu Wenruo int bio_list_bytes; 94*b8bea09aSQu Wenruo 95*b8bea09aSQu Wenruo int generic_bio_cnt; 96*b8bea09aSQu Wenruo 97*b8bea09aSQu Wenruo refcount_t refs; 98*b8bea09aSQu Wenruo 99*b8bea09aSQu Wenruo atomic_t stripes_pending; 100*b8bea09aSQu Wenruo 101*b8bea09aSQu Wenruo atomic_t error; 102*b8bea09aSQu Wenruo 103*b8bea09aSQu Wenruo /* Bitmap to record which horizontal stripe has data */ 104*b8bea09aSQu Wenruo unsigned long dbitmap; 105*b8bea09aSQu Wenruo 106*b8bea09aSQu Wenruo /* Allocated with stripe_nsectors-many bits for finish_*() calls */ 107*b8bea09aSQu Wenruo unsigned long finish_pbitmap; 108*b8bea09aSQu Wenruo 109*b8bea09aSQu Wenruo /* 110*b8bea09aSQu Wenruo * These are two arrays of pointers. We allocate the rbio big enough 111*b8bea09aSQu Wenruo * to hold them both and setup their locations when the rbio is 112*b8bea09aSQu Wenruo * allocated. 113*b8bea09aSQu Wenruo */ 114*b8bea09aSQu Wenruo 115*b8bea09aSQu Wenruo /* 116*b8bea09aSQu Wenruo * Pointers to pages that we allocated for reading/writing stripes 117*b8bea09aSQu Wenruo * directly from the disk (including P/Q). 118*b8bea09aSQu Wenruo */ 119*b8bea09aSQu Wenruo struct page **stripe_pages; 120*b8bea09aSQu Wenruo 121*b8bea09aSQu Wenruo /* Pointers to the sectors in the bio_list, for faster lookup */ 122*b8bea09aSQu Wenruo struct sector_ptr *bio_sectors; 123*b8bea09aSQu Wenruo 124*b8bea09aSQu Wenruo /* 125*b8bea09aSQu Wenruo * For subpage support, we need to map each sector to above 126*b8bea09aSQu Wenruo * stripe_pages. 127*b8bea09aSQu Wenruo */ 128*b8bea09aSQu Wenruo struct sector_ptr *stripe_sectors; 129*b8bea09aSQu Wenruo 130*b8bea09aSQu Wenruo /* Allocated with real_stripes-many pointers for finish_*() calls */ 131*b8bea09aSQu Wenruo void **finish_pointers; 132*b8bea09aSQu Wenruo }; 133*b8bea09aSQu Wenruo 134*b8bea09aSQu Wenruo /* 135*b8bea09aSQu Wenruo * For trace event usage only. Records useful debug info for each bio submitted 136*b8bea09aSQu Wenruo * by RAID56 to each physical device. 137*b8bea09aSQu Wenruo * 138*b8bea09aSQu Wenruo * No matter signed or not, (-1) is always the one indicating we can not grab 139*b8bea09aSQu Wenruo * the proper stripe number. 140*b8bea09aSQu Wenruo */ 141*b8bea09aSQu Wenruo struct raid56_bio_trace_info { 142*b8bea09aSQu Wenruo u64 devid; 143*b8bea09aSQu Wenruo 144*b8bea09aSQu Wenruo /* The offset inside the stripe. (<= STRIPE_LEN) */ 145*b8bea09aSQu Wenruo u32 offset; 146*b8bea09aSQu Wenruo 147*b8bea09aSQu Wenruo /* 148*b8bea09aSQu Wenruo * Stripe number. 149*b8bea09aSQu Wenruo * 0 is the first data stripe, and nr_data for P stripe, 150*b8bea09aSQu Wenruo * nr_data + 1 for Q stripe. 151*b8bea09aSQu Wenruo * >= real_stripes for 152*b8bea09aSQu Wenruo */ 153*b8bea09aSQu Wenruo u8 stripe_nr; 154*b8bea09aSQu Wenruo }; 155*b8bea09aSQu Wenruo 15672ad8131SDavid Sterba static inline int nr_parity_stripes(const struct map_lookup *map) 15753b381b3SDavid Woodhouse { 15853b381b3SDavid Woodhouse if (map->type & BTRFS_BLOCK_GROUP_RAID5) 15953b381b3SDavid Woodhouse return 1; 16053b381b3SDavid Woodhouse else if (map->type & BTRFS_BLOCK_GROUP_RAID6) 16153b381b3SDavid Woodhouse return 2; 16253b381b3SDavid Woodhouse else 16353b381b3SDavid Woodhouse return 0; 16453b381b3SDavid Woodhouse } 16553b381b3SDavid Woodhouse 16672ad8131SDavid Sterba static inline int nr_data_stripes(const struct map_lookup *map) 16753b381b3SDavid Woodhouse { 16853b381b3SDavid Woodhouse return map->num_stripes - nr_parity_stripes(map); 16953b381b3SDavid Woodhouse } 170*b8bea09aSQu Wenruo 17153b381b3SDavid Woodhouse #define RAID5_P_STRIPE ((u64)-2) 17253b381b3SDavid Woodhouse #define RAID6_Q_STRIPE ((u64)-1) 17353b381b3SDavid Woodhouse 17453b381b3SDavid Woodhouse #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \ 17553b381b3SDavid Woodhouse ((x) == RAID6_Q_STRIPE)) 17653b381b3SDavid Woodhouse 1775a6ac9eaSMiao Xie struct btrfs_device; 1785a6ac9eaSMiao Xie 1796a258d72SQu Wenruo int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, 180cc353a8bSQu Wenruo u32 stripe_len, int mirror_num, int generic_io); 181cc353a8bSQu Wenruo int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, u32 stripe_len); 18253b381b3SDavid Woodhouse 183b4ee1782SOmar Sandoval void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, 1846346f6bfSQu Wenruo unsigned int pgoff, u64 logical); 185b4ee1782SOmar Sandoval 1866a258d72SQu Wenruo struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, 187cc353a8bSQu Wenruo struct btrfs_io_context *bioc, u32 stripe_len, 1888e5cfb55SZhao Lei struct btrfs_device *scrub_dev, 1895a6ac9eaSMiao Xie unsigned long *dbitmap, int stripe_nsectors); 1905a6ac9eaSMiao Xie void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); 1915a6ac9eaSMiao Xie 192b4ee1782SOmar Sandoval struct btrfs_raid_bio * 1936a258d72SQu Wenruo raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc, 1946a258d72SQu Wenruo u64 length); 195b4ee1782SOmar Sandoval void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); 196b4ee1782SOmar Sandoval 19753b381b3SDavid Woodhouse int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); 19853b381b3SDavid Woodhouse void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); 1999888c340SDavid Sterba 20053b381b3SDavid Woodhouse #endif 201