1 #ifndef _RAID1_H 2 #define _RAID1_H 3 4 /* 5 * each barrier unit size is 64MB fow now 6 * note: it must be larger than RESYNC_DEPTH 7 */ 8 #define BARRIER_UNIT_SECTOR_BITS 17 9 #define BARRIER_UNIT_SECTOR_SIZE (1<<17) 10 /* 11 * In struct r1conf, the following members are related to I/O barrier 12 * buckets, 13 * atomic_t *nr_pending; 14 * atomic_t *nr_waiting; 15 * atomic_t *nr_queued; 16 * atomic_t *barrier; 17 * Each of them points to array of atomic_t variables, each array is 18 * designed to have BARRIER_BUCKETS_NR elements and occupy a single 19 * memory page. The data width of atomic_t variables is 4 bytes, equal 20 * to 1<<(ilog2(sizeof(atomic_t))), BARRIER_BUCKETS_NR_BITS is defined 21 * as (PAGE_SHIFT - ilog2(sizeof(int))) to make sure an array of 22 * atomic_t variables with BARRIER_BUCKETS_NR elements just exactly 23 * occupies a single memory page. 24 */ 25 #define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t))) 26 #define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS) 27 28 struct raid1_info { 29 struct md_rdev *rdev; 30 sector_t head_position; 31 32 /* When choose the best device for a read (read_balance()) 33 * we try to keep sequential reads one the same device 34 */ 35 sector_t next_seq_sect; 36 sector_t seq_start; 37 }; 38 39 /* 40 * memory pools need a pointer to the mddev, so they can force an unplug 41 * when memory is tight, and a count of the number of drives that the 42 * pool was allocated for, so they know how much to allocate and free. 43 * mddev->raid_disks cannot be used, as it can change while a pool is active 44 * These two datums are stored in a kmalloced struct. 45 * The 'raid_disks' here is twice the raid_disks in r1conf. 46 * This allows space for each 'real' device can have a replacement in the 47 * second half of the array. 48 */ 49 50 struct pool_info { 51 struct mddev *mddev; 52 int raid_disks; 53 }; 54 55 struct r1conf { 56 struct mddev *mddev; 57 struct raid1_info *mirrors; /* twice 'raid_disks' to 58 * allow for replacements. 59 */ 60 int raid_disks; 61 62 spinlock_t device_lock; 63 64 /* list of 'struct r1bio' that need to be processed by raid1d, 65 * whether to retry a read, writeout a resync or recovery 66 * block, or anything else. 67 */ 68 struct list_head retry_list; 69 /* A separate list of r1bio which just need raid_end_bio_io called. 70 * This mustn't happen for writes which had any errors if the superblock 71 * needs to be written. 72 */ 73 struct list_head bio_end_io_list; 74 75 /* queue pending writes to be submitted on unplug */ 76 struct bio_list pending_bio_list; 77 int pending_count; 78 79 /* for use when syncing mirrors: 80 * We don't allow both normal IO and resync/recovery IO at 81 * the same time - resync/recovery can only happen when there 82 * is no other IO. So when either is active, the other has to wait. 83 * See more details description in raid1.c near raise_barrier(). 84 */ 85 wait_queue_head_t wait_barrier; 86 spinlock_t resync_lock; 87 atomic_t nr_sync_pending; 88 atomic_t *nr_pending; 89 atomic_t *nr_waiting; 90 atomic_t *nr_queued; 91 atomic_t *barrier; 92 int array_frozen; 93 94 /* Set to 1 if a full sync is needed, (fresh device added). 95 * Cleared when a sync completes. 96 */ 97 int fullsync; 98 99 /* When the same as mddev->recovery_disabled we don't allow 100 * recovery to be attempted as we expect a read error. 101 */ 102 int recovery_disabled; 103 104 /* poolinfo contains information about the content of the 105 * mempools - it changes when the array grows or shrinks 106 */ 107 struct pool_info *poolinfo; 108 mempool_t *r1bio_pool; 109 mempool_t *r1buf_pool; 110 111 struct bio_set *bio_split; 112 113 /* temporary buffer to synchronous IO when attempting to repair 114 * a read error. 115 */ 116 struct page *tmppage; 117 118 /* When taking over an array from a different personality, we store 119 * the new thread here until we fully activate the array. 120 */ 121 struct md_thread *thread; 122 123 /* Keep track of cluster resync window to send to other 124 * nodes. 125 */ 126 sector_t cluster_sync_low; 127 sector_t cluster_sync_high; 128 129 }; 130 131 /* 132 * this is our 'private' RAID1 bio. 133 * 134 * it contains information about what kind of IO operations were started 135 * for this RAID1 operation, and about their status: 136 */ 137 138 struct r1bio { 139 atomic_t remaining; /* 'have we finished' count, 140 * used from IRQ handlers 141 */ 142 atomic_t behind_remaining; /* number of write-behind ios remaining 143 * in this BehindIO request 144 */ 145 sector_t sector; 146 int sectors; 147 unsigned long state; 148 struct mddev *mddev; 149 /* 150 * original bio going to /dev/mdx 151 */ 152 struct bio *master_bio; 153 /* 154 * if the IO is in READ direction, then this is where we read 155 */ 156 int read_disk; 157 158 struct list_head retry_list; 159 160 /* 161 * When R1BIO_BehindIO is set, we store pages for write behind 162 * in behind_master_bio. 163 */ 164 struct bio *behind_master_bio; 165 166 /* 167 * if the IO is in WRITE direction, then multiple bios are used. 168 * We choose the number when they are allocated. 169 */ 170 struct bio *bios[0]; 171 /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ 172 }; 173 174 /* bits for r1bio.state */ 175 enum r1bio_state { 176 R1BIO_Uptodate, 177 R1BIO_IsSync, 178 R1BIO_Degraded, 179 R1BIO_BehindIO, 180 /* Set ReadError on bios that experience a readerror so that 181 * raid1d knows what to do with them. 182 */ 183 R1BIO_ReadError, 184 /* For write-behind requests, we call bi_end_io when 185 * the last non-write-behind device completes, providing 186 * any write was successful. Otherwise we call when 187 * any write-behind write succeeds, otherwise we call 188 * with failure when last write completes (and all failed). 189 * Record that bi_end_io was called with this flag... 190 */ 191 R1BIO_Returned, 192 /* If a write for this request means we can clear some 193 * known-bad-block records, we set this flag 194 */ 195 R1BIO_MadeGood, 196 R1BIO_WriteError, 197 R1BIO_FailFast, 198 }; 199 200 static inline int sector_to_idx(sector_t sector) 201 { 202 return hash_long(sector >> BARRIER_UNIT_SECTOR_BITS, 203 BARRIER_BUCKETS_NR_BITS); 204 } 205 #endif 206