xref: /openbmc/linux/fs/btrfs/raid56.h (revision 3a3c7a7f)
19888c340SDavid Sterba /* SPDX-License-Identifier: GPL-2.0 */
253b381b3SDavid Woodhouse /*
353b381b3SDavid Woodhouse  * Copyright (C) 2012 Fusion-io  All rights reserved.
453b381b3SDavid Woodhouse  * Copyright (C) 2012 Intel Corp. All rights reserved.
553b381b3SDavid Woodhouse  */
653b381b3SDavid Woodhouse 
79888c340SDavid Sterba #ifndef BTRFS_RAID56_H
89888c340SDavid Sterba #define BTRFS_RAID56_H
99888c340SDavid Sterba 
10b8bea09aSQu Wenruo #include <linux/workqueue.h>
11b8bea09aSQu Wenruo #include "volumes.h"
12b8bea09aSQu Wenruo 
13b8bea09aSQu Wenruo enum btrfs_rbio_ops {
14b8bea09aSQu Wenruo 	BTRFS_RBIO_WRITE,
15b8bea09aSQu Wenruo 	BTRFS_RBIO_READ_REBUILD,
16b8bea09aSQu Wenruo 	BTRFS_RBIO_PARITY_SCRUB,
17b8bea09aSQu Wenruo };
18b8bea09aSQu Wenruo 
19b8bea09aSQu Wenruo struct btrfs_raid_bio {
20b8bea09aSQu Wenruo 	struct btrfs_io_context *bioc;
21b8bea09aSQu Wenruo 
22b8bea09aSQu Wenruo 	/*
23b8bea09aSQu Wenruo 	 * While we're doing RMW on a stripe we put it into a hash table so we
24b8bea09aSQu Wenruo 	 * can lock the stripe and merge more rbios into it.
25b8bea09aSQu Wenruo 	 */
26b8bea09aSQu Wenruo 	struct list_head hash_list;
27b8bea09aSQu Wenruo 
28b8bea09aSQu Wenruo 	/* LRU list for the stripe cache */
29b8bea09aSQu Wenruo 	struct list_head stripe_cache;
30b8bea09aSQu Wenruo 
31b8bea09aSQu Wenruo 	/* For scheduling work in the helper threads */
32b8bea09aSQu Wenruo 	struct work_struct work;
33b8bea09aSQu Wenruo 
34b8bea09aSQu Wenruo 	/*
35b8bea09aSQu Wenruo 	 * bio_list and bio_list_lock are used to add more bios into the stripe
36b8bea09aSQu Wenruo 	 * in hopes of avoiding the full RMW
37b8bea09aSQu Wenruo 	 */
38b8bea09aSQu Wenruo 	struct bio_list bio_list;
39b8bea09aSQu Wenruo 	spinlock_t bio_list_lock;
40b8bea09aSQu Wenruo 
41b8bea09aSQu Wenruo 	/*
42b8bea09aSQu Wenruo 	 * Also protected by the bio_list_lock, the plug list is used by the
43b8bea09aSQu Wenruo 	 * plugging code to collect partial bios while plugged.  The stripe
44b8bea09aSQu Wenruo 	 * locking code also uses it to hand off the stripe lock to the next
45b8bea09aSQu Wenruo 	 * pending IO.
46b8bea09aSQu Wenruo 	 */
47b8bea09aSQu Wenruo 	struct list_head plug_list;
48b8bea09aSQu Wenruo 
49b8bea09aSQu Wenruo 	/* Flags that tell us if it is safe to merge with this bio. */
50b8bea09aSQu Wenruo 	unsigned long flags;
51b8bea09aSQu Wenruo 
52b8bea09aSQu Wenruo 	/*
53b8bea09aSQu Wenruo 	 * Set if we're doing a parity rebuild for a read from higher up, which
54b8bea09aSQu Wenruo 	 * is handled differently from a parity rebuild as part of RMW.
55b8bea09aSQu Wenruo 	 */
56b8bea09aSQu Wenruo 	enum btrfs_rbio_ops operation;
57b8bea09aSQu Wenruo 
58b8bea09aSQu Wenruo 	/* How many pages there are for the full stripe including P/Q */
59b8bea09aSQu Wenruo 	u16 nr_pages;
60b8bea09aSQu Wenruo 
61b8bea09aSQu Wenruo 	/* How many sectors there are for the full stripe including P/Q */
62b8bea09aSQu Wenruo 	u16 nr_sectors;
63b8bea09aSQu Wenruo 
64b8bea09aSQu Wenruo 	/* Number of data stripes (no p/q) */
65b8bea09aSQu Wenruo 	u8 nr_data;
66b8bea09aSQu Wenruo 
6767da05b3SColin Ian King 	/* Number of all stripes (including P/Q) */
68b8bea09aSQu Wenruo 	u8 real_stripes;
69b8bea09aSQu Wenruo 
70b8bea09aSQu Wenruo 	/* How many pages there are for each stripe */
71b8bea09aSQu Wenruo 	u8 stripe_npages;
72b8bea09aSQu Wenruo 
73b8bea09aSQu Wenruo 	/* How many sectors there are for each stripe */
74b8bea09aSQu Wenruo 	u8 stripe_nsectors;
75b8bea09aSQu Wenruo 
76b8bea09aSQu Wenruo 	/* Stripe number that we're scrubbing  */
77b8bea09aSQu Wenruo 	u8 scrubp;
78b8bea09aSQu Wenruo 
79b8bea09aSQu Wenruo 	/*
80b8bea09aSQu Wenruo 	 * Size of all the bios in the bio_list.  This helps us decide if the
81b8bea09aSQu Wenruo 	 * rbio maps to a full stripe or not.
82b8bea09aSQu Wenruo 	 */
83b8bea09aSQu Wenruo 	int bio_list_bytes;
84b8bea09aSQu Wenruo 
85b8bea09aSQu Wenruo 	refcount_t refs;
86b8bea09aSQu Wenruo 
87b8bea09aSQu Wenruo 	atomic_t stripes_pending;
88b8bea09aSQu Wenruo 
89d817ce35SQu Wenruo 	wait_queue_head_t io_wait;
90d817ce35SQu Wenruo 
91b8bea09aSQu Wenruo 	/* Bitmap to record which horizontal stripe has data */
92b8bea09aSQu Wenruo 	unsigned long dbitmap;
93b8bea09aSQu Wenruo 
94b8bea09aSQu Wenruo 	/* Allocated with stripe_nsectors-many bits for finish_*() calls */
95b8bea09aSQu Wenruo 	unsigned long finish_pbitmap;
96b8bea09aSQu Wenruo 
97b8bea09aSQu Wenruo 	/*
98b8bea09aSQu Wenruo 	 * These are two arrays of pointers.  We allocate the rbio big enough
99b8bea09aSQu Wenruo 	 * to hold them both and setup their locations when the rbio is
100b8bea09aSQu Wenruo 	 * allocated.
101b8bea09aSQu Wenruo 	 */
102b8bea09aSQu Wenruo 
103b8bea09aSQu Wenruo 	/*
104b8bea09aSQu Wenruo 	 * Pointers to pages that we allocated for reading/writing stripes
105b8bea09aSQu Wenruo 	 * directly from the disk (including P/Q).
106b8bea09aSQu Wenruo 	 */
107b8bea09aSQu Wenruo 	struct page **stripe_pages;
108b8bea09aSQu Wenruo 
109b8bea09aSQu Wenruo 	/* Pointers to the sectors in the bio_list, for faster lookup */
110b8bea09aSQu Wenruo 	struct sector_ptr *bio_sectors;
111b8bea09aSQu Wenruo 
112b8bea09aSQu Wenruo 	/*
113b8bea09aSQu Wenruo 	 * For subpage support, we need to map each sector to above
114b8bea09aSQu Wenruo 	 * stripe_pages.
115b8bea09aSQu Wenruo 	 */
116b8bea09aSQu Wenruo 	struct sector_ptr *stripe_sectors;
117b8bea09aSQu Wenruo 
118b8bea09aSQu Wenruo 	/* Allocated with real_stripes-many pointers for finish_*() calls */
119b8bea09aSQu Wenruo 	void **finish_pointers;
1202942a50dSQu Wenruo 
1212942a50dSQu Wenruo 	/*
1222942a50dSQu Wenruo 	 * The bitmap recording where IO errors happened.
1232942a50dSQu Wenruo 	 * Each bit is corresponding to one sector in either bio_sectors[] or
1242942a50dSQu Wenruo 	 * stripe_sectors[] array.
1252942a50dSQu Wenruo 	 *
1262942a50dSQu Wenruo 	 * The reason we don't use another bit in sector_ptr is, we have two
1272942a50dSQu Wenruo 	 * arrays of sectors, and a lot of IO can use sectors in both arrays.
1282942a50dSQu Wenruo 	 * Thus making it much harder to iterate.
1292942a50dSQu Wenruo 	 */
1302942a50dSQu Wenruo 	unsigned long *error_bitmap;
131c5a41562SQu Wenruo 
132c5a41562SQu Wenruo 	/*
133c5a41562SQu Wenruo 	 * Checksum buffer if the rbio is for data.  The buffer should cover
13467da05b3SColin Ian King 	 * all data sectors (excluding P/Q sectors).
135c5a41562SQu Wenruo 	 */
136c5a41562SQu Wenruo 	u8 *csum_buf;
137c5a41562SQu Wenruo 
138c5a41562SQu Wenruo 	/*
139c5a41562SQu Wenruo 	 * Each bit represents if the corresponding sector has data csum found.
140c5a41562SQu Wenruo 	 * Should only cover data sectors (excluding P/Q sectors).
141c5a41562SQu Wenruo 	 */
142c5a41562SQu Wenruo 	unsigned long *csum_bitmap;
143b8bea09aSQu Wenruo };
144b8bea09aSQu Wenruo 
145b8bea09aSQu Wenruo /*
146b8bea09aSQu Wenruo  * For trace event usage only. Records useful debug info for each bio submitted
147b8bea09aSQu Wenruo  * by RAID56 to each physical device.
148b8bea09aSQu Wenruo  *
149b8bea09aSQu Wenruo  * No matter signed or not, (-1) is always the one indicating we can not grab
150b8bea09aSQu Wenruo  * the proper stripe number.
151b8bea09aSQu Wenruo  */
152b8bea09aSQu Wenruo struct raid56_bio_trace_info {
153b8bea09aSQu Wenruo 	u64 devid;
154b8bea09aSQu Wenruo 
155b8bea09aSQu Wenruo 	/* The offset inside the stripe. (<= STRIPE_LEN) */
156b8bea09aSQu Wenruo 	u32 offset;
157b8bea09aSQu Wenruo 
158b8bea09aSQu Wenruo 	/*
159b8bea09aSQu Wenruo 	 * Stripe number.
160b8bea09aSQu Wenruo 	 * 0 is the first data stripe, and nr_data for P stripe,
161b8bea09aSQu Wenruo 	 * nr_data + 1 for Q stripe.
162b8bea09aSQu Wenruo 	 * >= real_stripes for
163b8bea09aSQu Wenruo 	 */
164b8bea09aSQu Wenruo 	u8 stripe_nr;
165b8bea09aSQu Wenruo };
166b8bea09aSQu Wenruo 
nr_data_stripes(const struct map_lookup * map)16772ad8131SDavid Sterba static inline int nr_data_stripes(const struct map_lookup *map)
16853b381b3SDavid Woodhouse {
1690b30f719SQu Wenruo 	return map->num_stripes - btrfs_nr_parity_stripes(map->type);
17053b381b3SDavid Woodhouse }
171b8bea09aSQu Wenruo 
nr_bioc_data_stripes(const struct btrfs_io_context * bioc)1724886ff7bSQu Wenruo static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
1734886ff7bSQu Wenruo {
1744886ff7bSQu Wenruo 	return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
1754886ff7bSQu Wenruo }
1764886ff7bSQu Wenruo 
17753b381b3SDavid Woodhouse #define RAID5_P_STRIPE ((u64)-2)
17853b381b3SDavid Woodhouse #define RAID6_Q_STRIPE ((u64)-1)
17953b381b3SDavid Woodhouse 
18053b381b3SDavid Woodhouse #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) ||		\
18153b381b3SDavid Woodhouse 			     ((x) == RAID6_Q_STRIPE))
18253b381b3SDavid Woodhouse 
1835a6ac9eaSMiao Xie struct btrfs_device;
1845a6ac9eaSMiao Xie 
1856065fd95SChristoph Hellwig void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
186f1c29379SChristoph Hellwig 			   int mirror_num);
18731683f4aSChristoph Hellwig void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
18853b381b3SDavid Woodhouse 
1896a258d72SQu Wenruo struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
190ff18a4afSChristoph Hellwig 				struct btrfs_io_context *bioc,
1918e5cfb55SZhao Lei 				struct btrfs_device *scrub_dev,
1925a6ac9eaSMiao Xie 				unsigned long *dbitmap, int stripe_nsectors);
1935a6ac9eaSMiao Xie void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
1945a6ac9eaSMiao Xie 
195*94ead93eSQu Wenruo void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
196*94ead93eSQu Wenruo 				    struct page **data_pages, u64 data_logical);
197*94ead93eSQu Wenruo 
19853b381b3SDavid Woodhouse int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
19953b381b3SDavid Woodhouse void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
2009888c340SDavid Sterba 
20153b381b3SDavid Woodhouse #endif
202