xref: /openbmc/linux/fs/btrfs/raid56.h (revision b8bea09a)
19888c340SDavid Sterba /* SPDX-License-Identifier: GPL-2.0 */
253b381b3SDavid Woodhouse /*
353b381b3SDavid Woodhouse  * Copyright (C) 2012 Fusion-io  All rights reserved.
453b381b3SDavid Woodhouse  * Copyright (C) 2012 Intel Corp. All rights reserved.
553b381b3SDavid Woodhouse  */
653b381b3SDavid Woodhouse 
79888c340SDavid Sterba #ifndef BTRFS_RAID56_H
89888c340SDavid Sterba #define BTRFS_RAID56_H
99888c340SDavid Sterba 
10*b8bea09aSQu Wenruo #include <linux/workqueue.h>
11*b8bea09aSQu Wenruo #include "volumes.h"
12*b8bea09aSQu Wenruo 
13*b8bea09aSQu Wenruo enum btrfs_rbio_ops {
14*b8bea09aSQu Wenruo 	BTRFS_RBIO_WRITE,
15*b8bea09aSQu Wenruo 	BTRFS_RBIO_READ_REBUILD,
16*b8bea09aSQu Wenruo 	BTRFS_RBIO_PARITY_SCRUB,
17*b8bea09aSQu Wenruo 	BTRFS_RBIO_REBUILD_MISSING,
18*b8bea09aSQu Wenruo };
19*b8bea09aSQu Wenruo 
20*b8bea09aSQu Wenruo struct btrfs_raid_bio {
21*b8bea09aSQu Wenruo 	struct btrfs_io_context *bioc;
22*b8bea09aSQu Wenruo 
23*b8bea09aSQu Wenruo 	/*
24*b8bea09aSQu Wenruo 	 * While we're doing RMW on a stripe we put it into a hash table so we
25*b8bea09aSQu Wenruo 	 * can lock the stripe and merge more rbios into it.
26*b8bea09aSQu Wenruo 	 */
27*b8bea09aSQu Wenruo 	struct list_head hash_list;
28*b8bea09aSQu Wenruo 
29*b8bea09aSQu Wenruo 	/* LRU list for the stripe cache */
30*b8bea09aSQu Wenruo 	struct list_head stripe_cache;
31*b8bea09aSQu Wenruo 
32*b8bea09aSQu Wenruo 	/* For scheduling work in the helper threads */
33*b8bea09aSQu Wenruo 	struct work_struct work;
34*b8bea09aSQu Wenruo 
35*b8bea09aSQu Wenruo 	/*
36*b8bea09aSQu Wenruo 	 * bio_list and bio_list_lock are used to add more bios into the stripe
37*b8bea09aSQu Wenruo 	 * in hopes of avoiding the full RMW
38*b8bea09aSQu Wenruo 	 */
39*b8bea09aSQu Wenruo 	struct bio_list bio_list;
40*b8bea09aSQu Wenruo 	spinlock_t bio_list_lock;
41*b8bea09aSQu Wenruo 
42*b8bea09aSQu Wenruo 	/*
43*b8bea09aSQu Wenruo 	 * Also protected by the bio_list_lock, the plug list is used by the
44*b8bea09aSQu Wenruo 	 * plugging code to collect partial bios while plugged.  The stripe
45*b8bea09aSQu Wenruo 	 * locking code also uses it to hand off the stripe lock to the next
46*b8bea09aSQu Wenruo 	 * pending IO.
47*b8bea09aSQu Wenruo 	 */
48*b8bea09aSQu Wenruo 	struct list_head plug_list;
49*b8bea09aSQu Wenruo 
50*b8bea09aSQu Wenruo 	/* Flags that tell us if it is safe to merge with this bio. */
51*b8bea09aSQu Wenruo 	unsigned long flags;
52*b8bea09aSQu Wenruo 
53*b8bea09aSQu Wenruo 	/*
54*b8bea09aSQu Wenruo 	 * Set if we're doing a parity rebuild for a read from higher up, which
55*b8bea09aSQu Wenruo 	 * is handled differently from a parity rebuild as part of RMW.
56*b8bea09aSQu Wenruo 	 */
57*b8bea09aSQu Wenruo 	enum btrfs_rbio_ops operation;
58*b8bea09aSQu Wenruo 
59*b8bea09aSQu Wenruo 	/* Size of each individual stripe on disk */
60*b8bea09aSQu Wenruo 	u32 stripe_len;
61*b8bea09aSQu Wenruo 
62*b8bea09aSQu Wenruo 	/* How many pages there are for the full stripe including P/Q */
63*b8bea09aSQu Wenruo 	u16 nr_pages;
64*b8bea09aSQu Wenruo 
65*b8bea09aSQu Wenruo 	/* How many sectors there are for the full stripe including P/Q */
66*b8bea09aSQu Wenruo 	u16 nr_sectors;
67*b8bea09aSQu Wenruo 
68*b8bea09aSQu Wenruo 	/* Number of data stripes (no p/q) */
69*b8bea09aSQu Wenruo 	u8 nr_data;
70*b8bea09aSQu Wenruo 
71*b8bea09aSQu Wenruo 	/* Numer of all stripes (including P/Q) */
72*b8bea09aSQu Wenruo 	u8 real_stripes;
73*b8bea09aSQu Wenruo 
74*b8bea09aSQu Wenruo 	/* How many pages there are for each stripe */
75*b8bea09aSQu Wenruo 	u8 stripe_npages;
76*b8bea09aSQu Wenruo 
77*b8bea09aSQu Wenruo 	/* How many sectors there are for each stripe */
78*b8bea09aSQu Wenruo 	u8 stripe_nsectors;
79*b8bea09aSQu Wenruo 
80*b8bea09aSQu Wenruo 	/* First bad stripe, -1 means no corruption */
81*b8bea09aSQu Wenruo 	s8 faila;
82*b8bea09aSQu Wenruo 
83*b8bea09aSQu Wenruo 	/* Second bad stripe (for RAID6 use) */
84*b8bea09aSQu Wenruo 	s8 failb;
85*b8bea09aSQu Wenruo 
86*b8bea09aSQu Wenruo 	/* Stripe number that we're scrubbing  */
87*b8bea09aSQu Wenruo 	u8 scrubp;
88*b8bea09aSQu Wenruo 
89*b8bea09aSQu Wenruo 	/*
90*b8bea09aSQu Wenruo 	 * Size of all the bios in the bio_list.  This helps us decide if the
91*b8bea09aSQu Wenruo 	 * rbio maps to a full stripe or not.
92*b8bea09aSQu Wenruo 	 */
93*b8bea09aSQu Wenruo 	int bio_list_bytes;
94*b8bea09aSQu Wenruo 
95*b8bea09aSQu Wenruo 	int generic_bio_cnt;
96*b8bea09aSQu Wenruo 
97*b8bea09aSQu Wenruo 	refcount_t refs;
98*b8bea09aSQu Wenruo 
99*b8bea09aSQu Wenruo 	atomic_t stripes_pending;
100*b8bea09aSQu Wenruo 
101*b8bea09aSQu Wenruo 	atomic_t error;
102*b8bea09aSQu Wenruo 
103*b8bea09aSQu Wenruo 	/* Bitmap to record which horizontal stripe has data */
104*b8bea09aSQu Wenruo 	unsigned long dbitmap;
105*b8bea09aSQu Wenruo 
106*b8bea09aSQu Wenruo 	/* Allocated with stripe_nsectors-many bits for finish_*() calls */
107*b8bea09aSQu Wenruo 	unsigned long finish_pbitmap;
108*b8bea09aSQu Wenruo 
109*b8bea09aSQu Wenruo 	/*
110*b8bea09aSQu Wenruo 	 * These are two arrays of pointers.  We allocate the rbio big enough
111*b8bea09aSQu Wenruo 	 * to hold them both and setup their locations when the rbio is
112*b8bea09aSQu Wenruo 	 * allocated.
113*b8bea09aSQu Wenruo 	 */
114*b8bea09aSQu Wenruo 
115*b8bea09aSQu Wenruo 	/*
116*b8bea09aSQu Wenruo 	 * Pointers to pages that we allocated for reading/writing stripes
117*b8bea09aSQu Wenruo 	 * directly from the disk (including P/Q).
118*b8bea09aSQu Wenruo 	 */
119*b8bea09aSQu Wenruo 	struct page **stripe_pages;
120*b8bea09aSQu Wenruo 
121*b8bea09aSQu Wenruo 	/* Pointers to the sectors in the bio_list, for faster lookup */
122*b8bea09aSQu Wenruo 	struct sector_ptr *bio_sectors;
123*b8bea09aSQu Wenruo 
124*b8bea09aSQu Wenruo 	/*
125*b8bea09aSQu Wenruo 	 * For subpage support, we need to map each sector to above
126*b8bea09aSQu Wenruo 	 * stripe_pages.
127*b8bea09aSQu Wenruo 	 */
128*b8bea09aSQu Wenruo 	struct sector_ptr *stripe_sectors;
129*b8bea09aSQu Wenruo 
130*b8bea09aSQu Wenruo 	/* Allocated with real_stripes-many pointers for finish_*() calls */
131*b8bea09aSQu Wenruo 	void **finish_pointers;
132*b8bea09aSQu Wenruo };
133*b8bea09aSQu Wenruo 
134*b8bea09aSQu Wenruo /*
135*b8bea09aSQu Wenruo  * For trace event usage only. Records useful debug info for each bio submitted
136*b8bea09aSQu Wenruo  * by RAID56 to each physical device.
137*b8bea09aSQu Wenruo  *
138*b8bea09aSQu Wenruo  * No matter signed or not, (-1) is always the one indicating we can not grab
139*b8bea09aSQu Wenruo  * the proper stripe number.
140*b8bea09aSQu Wenruo  */
141*b8bea09aSQu Wenruo struct raid56_bio_trace_info {
142*b8bea09aSQu Wenruo 	u64 devid;
143*b8bea09aSQu Wenruo 
144*b8bea09aSQu Wenruo 	/* The offset inside the stripe. (<= STRIPE_LEN) */
145*b8bea09aSQu Wenruo 	u32 offset;
146*b8bea09aSQu Wenruo 
147*b8bea09aSQu Wenruo 	/*
148*b8bea09aSQu Wenruo 	 * Stripe number.
149*b8bea09aSQu Wenruo 	 * 0 is the first data stripe, and nr_data for P stripe,
150*b8bea09aSQu Wenruo 	 * nr_data + 1 for Q stripe.
151*b8bea09aSQu Wenruo 	 * >= real_stripes for
152*b8bea09aSQu Wenruo 	 */
153*b8bea09aSQu Wenruo 	u8 stripe_nr;
154*b8bea09aSQu Wenruo };
155*b8bea09aSQu Wenruo 
15672ad8131SDavid Sterba static inline int nr_parity_stripes(const struct map_lookup *map)
15753b381b3SDavid Woodhouse {
15853b381b3SDavid Woodhouse 	if (map->type & BTRFS_BLOCK_GROUP_RAID5)
15953b381b3SDavid Woodhouse 		return 1;
16053b381b3SDavid Woodhouse 	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
16153b381b3SDavid Woodhouse 		return 2;
16253b381b3SDavid Woodhouse 	else
16353b381b3SDavid Woodhouse 		return 0;
16453b381b3SDavid Woodhouse }
16553b381b3SDavid Woodhouse 
16672ad8131SDavid Sterba static inline int nr_data_stripes(const struct map_lookup *map)
16753b381b3SDavid Woodhouse {
16853b381b3SDavid Woodhouse 	return map->num_stripes - nr_parity_stripes(map);
16953b381b3SDavid Woodhouse }
170*b8bea09aSQu Wenruo 
17153b381b3SDavid Woodhouse #define RAID5_P_STRIPE ((u64)-2)
17253b381b3SDavid Woodhouse #define RAID6_Q_STRIPE ((u64)-1)
17353b381b3SDavid Woodhouse 
17453b381b3SDavid Woodhouse #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) ||		\
17553b381b3SDavid Woodhouse 			     ((x) == RAID6_Q_STRIPE))
17653b381b3SDavid Woodhouse 
1775a6ac9eaSMiao Xie struct btrfs_device;
1785a6ac9eaSMiao Xie 
1796a258d72SQu Wenruo int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
180cc353a8bSQu Wenruo 			  u32 stripe_len, int mirror_num, int generic_io);
181cc353a8bSQu Wenruo int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, u32 stripe_len);
18253b381b3SDavid Woodhouse 
183b4ee1782SOmar Sandoval void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
1846346f6bfSQu Wenruo 			    unsigned int pgoff, u64 logical);
185b4ee1782SOmar Sandoval 
1866a258d72SQu Wenruo struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
187cc353a8bSQu Wenruo 				struct btrfs_io_context *bioc, u32 stripe_len,
1888e5cfb55SZhao Lei 				struct btrfs_device *scrub_dev,
1895a6ac9eaSMiao Xie 				unsigned long *dbitmap, int stripe_nsectors);
1905a6ac9eaSMiao Xie void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
1915a6ac9eaSMiao Xie 
192b4ee1782SOmar Sandoval struct btrfs_raid_bio *
1936a258d72SQu Wenruo raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
1946a258d72SQu Wenruo 			  u64 length);
195b4ee1782SOmar Sandoval void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
196b4ee1782SOmar Sandoval 
19753b381b3SDavid Woodhouse int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
19853b381b3SDavid Woodhouse void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
1999888c340SDavid Sterba 
20053b381b3SDavid Woodhouse #endif
201