1 #ifndef _RAID1_H 2 #define _RAID1_H 3 4 /* 5 * each barrier unit size is 64MB fow now 6 * note: it must be larger than RESYNC_DEPTH 7 */ 8 #define BARRIER_UNIT_SECTOR_BITS 17 9 #define BARRIER_UNIT_SECTOR_SIZE (1<<17) 10 /* 11 * In struct r1conf, the following members are related to I/O barrier 12 * buckets, 13 * atomic_t *nr_pending; 14 * atomic_t *nr_waiting; 15 * atomic_t *nr_queued; 16 * atomic_t *barrier; 17 * Each of them points to array of atomic_t variables, each array is 18 * designed to have BARRIER_BUCKETS_NR elements and occupy a single 19 * memory page. The data width of atomic_t variables is 4 bytes, equal 20 * to 1<<(ilog2(sizeof(atomic_t))), BARRIER_BUCKETS_NR_BITS is defined 21 * as (PAGE_SHIFT - ilog2(sizeof(int))) to make sure an array of 22 * atomic_t variables with BARRIER_BUCKETS_NR elements just exactly 23 * occupies a single memory page. 24 */ 25 #define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t))) 26 #define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS) 27 28 struct raid1_info { 29 struct md_rdev *rdev; 30 sector_t head_position; 31 32 /* When choose the best device for a read (read_balance()) 33 * we try to keep sequential reads one the same device 34 */ 35 sector_t next_seq_sect; 36 sector_t seq_start; 37 }; 38 39 /* 40 * memory pools need a pointer to the mddev, so they can force an unplug 41 * when memory is tight, and a count of the number of drives that the 42 * pool was allocated for, so they know how much to allocate and free. 43 * mddev->raid_disks cannot be used, as it can change while a pool is active 44 * These two datums are stored in a kmalloced struct. 45 * The 'raid_disks' here is twice the raid_disks in r1conf. 46 * This allows space for each 'real' device can have a replacement in the 47 * second half of the array. 48 */ 49 50 struct pool_info { 51 struct mddev *mddev; 52 int raid_disks; 53 }; 54 55 struct r1conf { 56 struct mddev *mddev; 57 struct raid1_info *mirrors; /* twice 'raid_disks' to 58 * allow for replacements. 59 */ 60 int raid_disks; 61 62 spinlock_t device_lock; 63 64 /* list of 'struct r1bio' that need to be processed by raid1d, 65 * whether to retry a read, writeout a resync or recovery 66 * block, or anything else. 67 */ 68 struct list_head retry_list; 69 /* A separate list of r1bio which just need raid_end_bio_io called. 70 * This mustn't happen for writes which had any errors if the superblock 71 * needs to be written. 72 */ 73 struct list_head bio_end_io_list; 74 75 /* queue pending writes to be submitted on unplug */ 76 struct bio_list pending_bio_list; 77 int pending_count; 78 79 /* for use when syncing mirrors: 80 * We don't allow both normal IO and resync/recovery IO at 81 * the same time - resync/recovery can only happen when there 82 * is no other IO. So when either is active, the other has to wait. 83 * See more details description in raid1.c near raise_barrier(). 84 */ 85 wait_queue_head_t wait_barrier; 86 spinlock_t resync_lock; 87 atomic_t *nr_pending; 88 atomic_t *nr_waiting; 89 atomic_t *nr_queued; 90 atomic_t *barrier; 91 int array_frozen; 92 93 /* Set to 1 if a full sync is needed, (fresh device added). 94 * Cleared when a sync completes. 95 */ 96 int fullsync; 97 98 /* When the same as mddev->recovery_disabled we don't allow 99 * recovery to be attempted as we expect a read error. 100 */ 101 int recovery_disabled; 102 103 /* poolinfo contains information about the content of the 104 * mempools - it changes when the array grows or shrinks 105 */ 106 struct pool_info *poolinfo; 107 mempool_t *r1bio_pool; 108 mempool_t *r1buf_pool; 109 110 /* temporary buffer to synchronous IO when attempting to repair 111 * a read error. 112 */ 113 struct page *tmppage; 114 115 /* When taking over an array from a different personality, we store 116 * the new thread here until we fully activate the array. 117 */ 118 struct md_thread *thread; 119 120 /* Keep track of cluster resync window to send to other 121 * nodes. 122 */ 123 sector_t cluster_sync_low; 124 sector_t cluster_sync_high; 125 126 }; 127 128 /* 129 * this is our 'private' RAID1 bio. 130 * 131 * it contains information about what kind of IO operations were started 132 * for this RAID1 operation, and about their status: 133 */ 134 135 struct r1bio { 136 atomic_t remaining; /* 'have we finished' count, 137 * used from IRQ handlers 138 */ 139 atomic_t behind_remaining; /* number of write-behind ios remaining 140 * in this BehindIO request 141 */ 142 sector_t sector; 143 int sectors; 144 unsigned long state; 145 struct mddev *mddev; 146 /* 147 * original bio going to /dev/mdx 148 */ 149 struct bio *master_bio; 150 /* 151 * if the IO is in READ direction, then this is where we read 152 */ 153 int read_disk; 154 155 struct list_head retry_list; 156 /* Next two are only valid when R1BIO_BehindIO is set */ 157 struct bio_vec *behind_bvecs; 158 int behind_page_count; 159 /* 160 * if the IO is in WRITE direction, then multiple bios are used. 161 * We choose the number when they are allocated. 162 */ 163 struct bio *bios[0]; 164 /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ 165 }; 166 167 /* bits for r1bio.state */ 168 enum r1bio_state { 169 R1BIO_Uptodate, 170 R1BIO_IsSync, 171 R1BIO_Degraded, 172 R1BIO_BehindIO, 173 /* Set ReadError on bios that experience a readerror so that 174 * raid1d knows what to do with them. 175 */ 176 R1BIO_ReadError, 177 /* For write-behind requests, we call bi_end_io when 178 * the last non-write-behind device completes, providing 179 * any write was successful. Otherwise we call when 180 * any write-behind write succeeds, otherwise we call 181 * with failure when last write completes (and all failed). 182 * Record that bi_end_io was called with this flag... 183 */ 184 R1BIO_Returned, 185 /* If a write for this request means we can clear some 186 * known-bad-block records, we set this flag 187 */ 188 R1BIO_MadeGood, 189 R1BIO_WriteError, 190 R1BIO_FailFast, 191 }; 192 193 static inline int sector_to_idx(sector_t sector) 194 { 195 return hash_long(sector >> BARRIER_UNIT_SECTOR_BITS, 196 BARRIER_BUCKETS_NR_BITS); 197 } 198 #endif 199