raid5.c - OpenGrok cross reference for /openbmc/linux/drivers/md/raid5.c

Deleted Added

sdiffudifftextold (3bddb7f8..)new (3a83f467..)

raid5.c (3bddb7f8f264ec58dc86e11ca97341c24f9d38f6)	raid5.c (3a83f4677539bce8eaa2bca9ee9c20e172d7ab04)
1/* 2 * raid5.c : Multiple Devices driver for Linux 3 * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman 4 * Copyright (C) 1999, 2000 Ingo Molnar 5 * Copyright (C) 2002, 2003 H. Peter Anvin 6 * 7 * RAID-4/5/6 management functions. 8 * Thanks to Penguin Computing for making the RAID-6 development possible --- 56 unchanged lines hidden (view full) --- 65#define cpu_to_group(cpu) cpu_to_node(cpu) 66#define ANY_GROUP NUMA_NO_NODE 67 68static bool devices_handle_discard_safely = false; 69module_param(devices_handle_discard_safely, bool, 0644); 70MODULE_PARM_DESC(devices_handle_discard_safely, 71 "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions"); 72static struct workqueue_struct *raid5_wq;	1/* 2 * raid5.c : Multiple Devices driver for Linux 3 * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman 4 * Copyright (C) 1999, 2000 Ingo Molnar 5 * Copyright (C) 2002, 2003 H. Peter Anvin 6 * 7 * RAID-4/5/6 management functions. 8 * Thanks to Penguin Computing for making the RAID-6 development possible --- 56 unchanged lines hidden (view full) --- 65#define cpu_to_group(cpu) cpu_to_node(cpu) 66#define ANY_GROUP NUMA_NO_NODE 67 68static bool devices_handle_discard_safely = false; 69module_param(devices_handle_discard_safely, bool, 0644); 70MODULE_PARM_DESC(devices_handle_discard_safely, 71 "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions"); 72static struct workqueue_struct *raid5_wq;
	73/* 74 * Stripe cache 75 */
73	76
	77#define NR_STRIPES 256 78#define STRIPE_SIZE PAGE_SIZE 79#define STRIPE_SHIFT (PAGE_SHIFT - 9) 80#define STRIPE_SECTORS (STRIPE_SIZE>>9) 81#define IO_THRESHOLD 1 82#define BYPASS_THRESHOLD 1 83#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) 84#define HASH_MASK (NR_HASH - 1) 85#define MAX_STRIPE_BATCH 8 86
74static inline struct hlist_head stripe_hash(struct r5conf conf, sector_t sect) 75{ 76 int hash = (sect >> STRIPE_SHIFT) & HASH_MASK; 77 return &conf->stripe_hashtbl[hash]; 78} 79 80static inline int stripe_hash_locks_hash(sector_t sect) 81{ --- 26 unchanged lines hidden (view full) --- 108{ 109 int i; 110 spin_unlock(&conf->device_lock); 111 for (i = NR_STRIPE_HASH_LOCKS; i; i--) 112 spin_unlock(conf->hash_locks + i - 1); 113 local_irq_enable(); 114} 115	87static inline struct hlist_head stripe_hash(struct r5conf conf, sector_t sect) 88{ 89 int hash = (sect >> STRIPE_SHIFT) & HASH_MASK; 90 return &conf->stripe_hashtbl[hash]; 91} 92 93static inline int stripe_hash_locks_hash(sector_t sect) 94{ --- 26 unchanged lines hidden (view full) --- 121{ 122 int i; 123 spin_unlock(&conf->device_lock); 124 for (i = NR_STRIPE_HASH_LOCKS; i; i--) 125 spin_unlock(conf->hash_locks + i - 1); 126 local_irq_enable(); 127} 128
	129/* bio's attached to a stripe+device for I/O are linked together in bi_sector 130 * order without overlap. There may be several bio's per stripe+device, and 131 * a bio could span several devices. 132 * When walking this list for a particular stripe+device, we must never proceed 133 * beyond a bio that extends past this device, as the next bio might no longer 134 * be valid. 135 * This function is used to determine the 'next' bio in the list, given the sector 136 * of the current stripe+device 137 / 138static inline struct bio r5_next_bio(struct bio bio, sector_t sector) 139{ 140 int sectors = bio_sectors(bio); 141 if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS) 142 return bio->bi_next; 143 else 144 return NULL; 145} 146 147/ 148 * We maintain a biased count of active stripes in the bottom 16 bits of 149 * bi_phys_segments, and a count of processed stripes in the upper 16 bits 150 / 151static inline int raid5_bi_processed_stripes(struct bio bio) 152{ 153 atomic_t segments = (atomic_t )&bio->bi_phys_segments; 154 return (atomic_read(segments) >> 16) & 0xffff; 155} 156 157static inline int raid5_dec_bi_active_stripes(struct bio bio) 158{ 159 atomic_t segments = (atomic_t )&bio->bi_phys_segments; 160 return atomic_sub_return(1, segments) & 0xffff; 161} 162 163static inline void raid5_inc_bi_active_stripes(struct bio bio) 164{ 165 atomic_t segments = (atomic_t )&bio->bi_phys_segments; 166 atomic_inc(segments); 167} 168 169static inline void raid5_set_bi_processed_stripes(struct bio bio, 170 unsigned int cnt) 171{ 172 atomic_t segments = (atomic_t )&bio->bi_phys_segments; 173 int old, new; 174 175 do { 176 old = atomic_read(segments); 177 new = (old & 0xffff) \| (cnt << 16); 178 } while (atomic_cmpxchg(segments, old, new) != old); 179} 180 181static inline void raid5_set_bi_stripes(struct bio bio, unsigned int cnt) 182{ 183 atomic_t segments = (atomic_t )&bio->bi_phys_segments; 184 atomic_set(segments, cnt); 185} 186
116/* Find first data disk in a raid6 stripe / 117static inline int raid6_d0(struct stripe_head sh) 118{ 119 if (sh->ddf_layout) 120 /* ddf always start from first device / 121 return 0; 122 / md starts just after Q block / 123 if (sh->qd_idx == sh->disks - 1) --- 89 unchanged lines hidden* (view full) --- 213 thread_cnt--; 214 } 215 } 216} 217 218static void do_release_stripe(struct r5conf conf, struct stripe_head sh, 219 struct list_head *temp_inactive_list) 220{	187/* Find first data disk in a raid6 stripe / 188static inline int raid6_d0(struct stripe_head sh) 189{ 190 if (sh->ddf_layout) 191 /* ddf always start from first device / 192 return 0; 193 / md starts just after Q block / 194 if (sh->qd_idx == sh->disks - 1) --- 89 unchanged lines hidden* (view full) --- 284 thread_cnt--; 285 } 286 } 287} 288 289static void do_release_stripe(struct r5conf conf, struct stripe_head sh, 290 struct list_head *temp_inactive_list) 291{
221 int i; 222 int injournal = 0; /* number of date pages with R5_InJournal */ 223
224 BUG_ON(!list_empty(&sh->lru)); 225 BUG_ON(atomic_read(&conf->active_stripes)==0);	292 BUG_ON(!list_empty(&sh->lru)); 293 BUG_ON(atomic_read(&conf->active_stripes)==0);
226 227 if (r5c_is_writeback(conf->log)) 228 for (i = sh->disks; i--; ) 229 if (test_bit(R5_InJournal, &sh->dev[i].flags)) 230 injournal++; 231 /* 232 * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with 233 * data in journal, so they are not released to cached lists 234 */ 235 if (conf->quiesce && r5c_is_writeback(conf->log) && 236 !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) { 237 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) 238 r5c_make_stripe_write_out(sh); 239 set_bit(STRIPE_HANDLE, &sh->state); 240 } 241
242 if (test_bit(STRIPE_HANDLE, &sh->state)) { 243 if (test_bit(STRIPE_DELAYED, &sh->state) && 244 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 245 list_add_tail(&sh->lru, &conf->delayed_list); 246 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 247 sh->bm_seq - conf->seq_write > 0) 248 list_add_tail(&sh->lru, &conf->bitmap_list); 249 else { --- 9 unchanged lines hidden (view full) --- 259 md_wakeup_thread(conf->mddev->thread); 260 } else { 261 BUG_ON(stripe_operations_active(sh)); 262 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 263 if (atomic_dec_return(&conf->preread_active_stripes) 264 < IO_THRESHOLD) 265 md_wakeup_thread(conf->mddev->thread); 266 atomic_dec(&conf->active_stripes);	294 if (test_bit(STRIPE_HANDLE, &sh->state)) { 295 if (test_bit(STRIPE_DELAYED, &sh->state) && 296 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 297 list_add_tail(&sh->lru, &conf->delayed_list); 298 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 299 sh->bm_seq - conf->seq_write > 0) 300 list_add_tail(&sh->lru, &conf->bitmap_list); 301 else { --- 9 unchanged lines hidden (view full) --- 311 md_wakeup_thread(conf->mddev->thread); 312 } else { 313 BUG_ON(stripe_operations_active(sh)); 314 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 315 if (atomic_dec_return(&conf->preread_active_stripes) 316 < IO_THRESHOLD) 317 md_wakeup_thread(conf->mddev->thread); 318 atomic_dec(&conf->active_stripes);
267 if (!test_bit(STRIPE_EXPANDING, &sh->state)) { 268 if (!r5c_is_writeback(conf->log)) 269 list_add_tail(&sh->lru, temp_inactive_list); 270 else { 271 WARN_ON(test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)); 272 if (injournal == 0) 273 list_add_tail(&sh->lru, temp_inactive_list); 274 else if (injournal == conf->raid_disks - conf->max_degraded) { 275 /* full stripe / 276 if (!test_and_set_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) 277 atomic_inc(&conf->r5c_cached_full_stripes); 278 if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) 279 atomic_dec(&conf->r5c_cached_partial_stripes); 280 list_add_tail(&sh->lru, &conf->r5c_full_stripe_list); 281 r5c_check_cached_full_stripe(conf); 282 } else { 283 / partial stripe */ 284 if (!test_and_set_bit(STRIPE_R5C_PARTIAL_STRIPE, 285 &sh->state)) 286 atomic_inc(&conf->r5c_cached_partial_stripes); 287 list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list); 288 } 289 } 290 }	319 if (!test_bit(STRIPE_EXPANDING, &sh->state)) 320 list_add_tail(&sh->lru, temp_inactive_list);
291 } 292} 293 294static void __release_stripe(struct r5conf conf, struct stripe_head sh, 295 struct list_head temp_inactive_list) 296{ 297 if (atomic_dec_and_test(&sh->count)) 298 do_release_stripe(conf, sh, temp_inactive_list); --- 207 unchanged lines hidden* (view full) --- 506 stripe_set_idx(sector, conf, previous, sh); 507 sh->state = 0; 508 509 for (i = sh->disks; i--; ) { 510 struct r5dev *dev = &sh->dev[i]; 511 512 if (dev->toread \|\| dev->read \|\| dev->towrite \|\| dev->written \|\| 513 test_bit(R5_LOCKED, &dev->flags)) {	321 } 322} 323 324static void __release_stripe(struct r5conf conf, struct stripe_head sh, 325 struct list_head temp_inactive_list) 326{ 327 if (atomic_dec_and_test(&sh->count)) 328 do_release_stripe(conf, sh, temp_inactive_list); --- 207 unchanged lines hidden* (view full) --- 536 stripe_set_idx(sector, conf, previous, sh); 537 sh->state = 0; 538 539 for (i = sh->disks; i--; ) { 540 struct r5dev *dev = &sh->dev[i]; 541 542 if (dev->toread \|\| dev->read \|\| dev->towrite \|\| dev->written \|\| 543 test_bit(R5_LOCKED, &dev->flags)) {
514 pr_err("sector=%llx i=%d %p %p %p %p %d\n",	544 printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n",
515 (unsigned long long)sh->sector, i, dev->toread, 516 dev->read, dev->towrite, dev->written, 517 test_bit(R5_LOCKED, &dev->flags)); 518 WARN_ON(1); 519 } 520 dev->flags = 0; 521 raid5_build_block(sh, i, previous); 522 } --- 122 unchanged lines hidden (view full) --- 645 sh = get_free_stripe(conf, hash); 646 if (!sh && !test_bit(R5_DID_ALLOC, 647 &conf->cache_state)) 648 set_bit(R5_ALLOC_MORE, 649 &conf->cache_state); 650 } 651 if (noblock && sh == NULL) 652 break;	545 (unsigned long long)sh->sector, i, dev->toread, 546 dev->read, dev->towrite, dev->written, 547 test_bit(R5_LOCKED, &dev->flags)); 548 WARN_ON(1); 549 } 550 dev->flags = 0; 551 raid5_build_block(sh, i, previous); 552 } --- 122 unchanged lines hidden (view full) --- 675 sh = get_free_stripe(conf, hash); 676 if (!sh && !test_bit(R5_DID_ALLOC, 677 &conf->cache_state)) 678 set_bit(R5_ALLOC_MORE, 679 &conf->cache_state); 680 } 681 if (noblock && sh == NULL) 682 break;
653 654 r5c_check_stripe_cache_usage(conf);
655 if (!sh) { 656 set_bit(R5_INACTIVE_BLOCKED, 657 &conf->cache_state);	683 if (!sh) { 684 set_bit(R5_INACTIVE_BLOCKED, 685 &conf->cache_state);
658 r5l_wake_reclaim(conf->log, 0);
659 wait_event_lock_irq( 660 conf->wait_for_stripe, 661 !list_empty(conf->inactive_list + hash) && 662 (atomic_read(&conf->active_stripes) 663 < (conf->max_nr_stripes * 3 / 4) 664 \|\| !test_bit(R5_INACTIVE_BLOCKED, 665 &conf->cache_state)), 666 (conf->hash_locks + hash)); --- 202 unchanged lines hidden* (view full) --- 869static void ops_run_io(struct stripe_head sh, struct stripe_head_state s) 870{ 871 struct r5conf conf = sh->raid_conf; 872 int i, disks = sh->disks; 873 struct stripe_head head_sh = sh; 874 875 might_sleep(); 876	686 wait_event_lock_irq( 687 conf->wait_for_stripe, 688 !list_empty(conf->inactive_list + hash) && 689 (atomic_read(&conf->active_stripes) 690 < (conf->max_nr_stripes * 3 / 4) 691 \|\| !test_bit(R5_INACTIVE_BLOCKED, 692 &conf->cache_state)), 693 (conf->hash_locks + hash)); --- 202 unchanged lines hidden* (view full) --- 896static void ops_run_io(struct stripe_head sh, struct stripe_head_state s) 897{ 898 struct r5conf conf = sh->raid_conf; 899 int i, disks = sh->disks; 900 struct stripe_head head_sh = sh; 901 902 might_sleep(); 903
877 if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) { 878 /* writing out phase / 879 if (r5l_write_stripe(conf->log, sh) == 0) 880 return; 881 } else { / caching phase */ 882 if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) { 883 r5c_cache_data(conf->log, sh, s); 884 return; 885 } 886 } 887	904 if (r5l_write_stripe(conf->log, sh) == 0) 905 return;
888 for (i = disks; i--; ) { 889 int op, op_flags = 0; 890 int replace_only = 0; 891 struct bio bi, rbi; 892 struct md_rdev rdev, rrdev = NULL; 893 894 sh = head_sh; 895 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { 896 op = REQ_OP_WRITE; 897 if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))	906 for (i = disks; i--; ) { 907 int op, op_flags = 0; 908 int replace_only = 0; 909 struct bio bi, rbi; 910 struct md_rdev rdev, rrdev = NULL; 911 912 sh = head_sh; 913 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { 914 op = REQ_OP_WRITE; 915 if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
898 op_flags = WRITE_FUA;	916 op_flags = REQ_FUA;
899 if (test_bit(R5_Discard, &sh->dev[i].flags)) 900 op = REQ_OP_DISCARD; 901 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) 902 op = REQ_OP_READ; 903 else if (test_and_clear_bit(R5_WantReplace, 904 &sh->dev[i].flags)) { 905 op = REQ_OP_WRITE; 906 replace_only = 1; --- 185 unchanged lines hidden (view full) --- 1092 if (sh != head_sh) 1093 goto again; 1094 } 1095} 1096 1097static struct dma_async_tx_descriptor * 1098async_copy_data(int frombio, struct bio bio, struct page page, 1099 sector_t sector, struct dma_async_tx_descriptor tx,	917 if (test_bit(R5_Discard, &sh->dev[i].flags)) 918 op = REQ_OP_DISCARD; 919 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) 920 op = REQ_OP_READ; 921 else if (test_and_clear_bit(R5_WantReplace, 922 &sh->dev[i].flags)) { 923 op = REQ_OP_WRITE; 924 replace_only = 1; --- 185 unchanged lines hidden (view full) --- 1110 if (sh != head_sh) 1111 goto again; 1112 } 1113} 1114 1115static struct dma_async_tx_descriptor * 1116async_copy_data(int frombio, struct bio bio, struct page page, 1117 sector_t sector, struct dma_async_tx_descriptor tx,
1100 struct stripe_head *sh, int no_skipcopy)	1118 struct stripe_head *sh)
1101{ 1102 struct bio_vec bvl; 1103 struct bvec_iter iter; 1104 struct page bio_page; 1105 int page_offset; 1106 struct async_submit_ctl submit; 1107 enum async_tx_flags flags = 0; 1108 --- 23 unchanged lines hidden* (view full) --- 1132 clen = len; 1133 1134 if (clen > 0) { 1135 b_offset += bvl.bv_offset; 1136 bio_page = bvl.bv_page; 1137 if (frombio) { 1138 if (sh->raid_conf->skip_copy && 1139 b_offset == 0 && page_offset == 0 &&	1119{ 1120 struct bio_vec bvl; 1121 struct bvec_iter iter; 1122 struct page bio_page; 1123 int page_offset; 1124 struct async_submit_ctl submit; 1125 enum async_tx_flags flags = 0; 1126 --- 23 unchanged lines hidden* (view full) --- 1150 clen = len; 1151 1152 if (clen > 0) { 1153 b_offset += bvl.bv_offset; 1154 bio_page = bvl.bv_page; 1155 if (frombio) { 1156 if (sh->raid_conf->skip_copy && 1157 b_offset == 0 && page_offset == 0 &&
1140 clen == STRIPE_SIZE && 1141 !no_skipcopy)	1158 clen == STRIPE_SIZE)
1142 page = bio_page; 1143 else 1144 tx = async_memcpy(page, bio_page, page_offset, 1145 b_offset, clen, &submit); 1146 } else 1147 tx = async_memcpy(bio_page, page, b_offset, 1148 page_offset, clen, &submit); 1149 } --- 65 unchanged lines hidden* (view full) --- 1215 struct bio *rbi; 1216 spin_lock_irq(&sh->stripe_lock); 1217 dev->read = rbi = dev->toread; 1218 dev->toread = NULL; 1219 spin_unlock_irq(&sh->stripe_lock); 1220 while (rbi && rbi->bi_iter.bi_sector < 1221 dev->sector + STRIPE_SECTORS) { 1222 tx = async_copy_data(0, rbi, &dev->page,	1159 page = bio_page; 1160 else 1161 tx = async_memcpy(page, bio_page, page_offset, 1162 b_offset, clen, &submit); 1163 } else 1164 tx = async_memcpy(bio_page, page, b_offset, 1165 page_offset, clen, &submit); 1166 } --- 65 unchanged lines hidden* (view full) --- 1232 struct bio *rbi; 1233 spin_lock_irq(&sh->stripe_lock); 1234 dev->read = rbi = dev->toread; 1235 dev->toread = NULL; 1236 spin_unlock_irq(&sh->stripe_lock); 1237 while (rbi && rbi->bi_iter.bi_sector < 1238 dev->sector + STRIPE_SECTORS) { 1239 tx = async_copy_data(0, rbi, &dev->page,
1223 dev->sector, tx, sh, 0);	1240 dev->sector, tx, sh);
1224 rbi = r5_next_bio(rbi, dev->sector); 1225 } 1226 } 1227 } 1228 1229 atomic_inc(&sh->count); 1230 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); 1231 async_trigger_callback(&submit); --- 110 unchanged lines hidden (view full) --- 1342 i = d0_idx; 1343 do { 1344 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); 1345 struct r5dev *dev = &sh->dev[i]; 1346 1347 if (i == sh->qd_idx \|\| i == sh->pd_idx \|\| 1348 (srctype == SYNDROME_SRC_ALL) \|\| 1349 (srctype == SYNDROME_SRC_WANT_DRAIN &&	1241 rbi = r5_next_bio(rbi, dev->sector); 1242 } 1243 } 1244 } 1245 1246 atomic_inc(&sh->count); 1247 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); 1248 async_trigger_callback(&submit); --- 110 unchanged lines hidden (view full) --- 1359 i = d0_idx; 1360 do { 1361 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); 1362 struct r5dev *dev = &sh->dev[i]; 1363 1364 if (i == sh->qd_idx \|\| i == sh->pd_idx \|\| 1365 (srctype == SYNDROME_SRC_ALL) \|\| 1366 (srctype == SYNDROME_SRC_WANT_DRAIN &&
1350 (test_bit(R5_Wantdrain, &dev->flags) \|\| 1351 test_bit(R5_InJournal, &dev->flags))) \|\|	1367 test_bit(R5_Wantdrain, &dev->flags)) \|\|
1352 (srctype == SYNDROME_SRC_WRITTEN &&	1368 (srctype == SYNDROME_SRC_WRITTEN &&
1353 dev->written)) { 1354 if (test_bit(R5_InJournal, &dev->flags)) 1355 srcs[slot] = sh->dev[i].orig_page; 1356 else 1357 srcs[slot] = sh->dev[i].page; 1358 }	1369 dev->written)) 1370 srcs[slot] = sh->dev[i].page;
1359 i = raid6_next_disk(i, disks); 1360 } while (i != d0_idx); 1361 1362 return syndrome_disks; 1363} 1364 1365static struct dma_async_tx_descriptor * 1366ops_run_compute6_1(struct stripe_head sh, struct raid5_percpu percpu) --- 162 unchanged lines hidden (view full) --- 1529} 1530 1531static void ops_complete_prexor(void stripe_head_ref) 1532{ 1533 struct stripe_head sh = stripe_head_ref; 1534 1535 pr_debug("%s: stripe %llu\n", __func__, 1536 (unsigned long long)sh->sector);	1371 i = raid6_next_disk(i, disks); 1372 } while (i != d0_idx); 1373 1374 return syndrome_disks; 1375} 1376 1377static struct dma_async_tx_descriptor * 1378ops_run_compute6_1(struct stripe_head sh, struct raid5_percpu percpu) --- 162 unchanged lines hidden (view full) --- 1541} 1542 1543static void ops_complete_prexor(void stripe_head_ref) 1544{ 1545 struct stripe_head sh = stripe_head_ref; 1546 1547 pr_debug("%s: stripe %llu\n", __func__, 1548 (unsigned long long)sh->sector);
1537 1538 if (r5c_is_writeback(sh->raid_conf->log)) 1539 /* 1540 * raid5-cache write back uses orig_page during prexor. 1541 * After prexor, it is time to free orig_page 1542 */ 1543 r5c_release_extra_page(sh);
1544} 1545 1546static struct dma_async_tx_descriptor * 1547ops_run_prexor5(struct stripe_head sh, struct raid5_percpu percpu, 1548 struct dma_async_tx_descriptor tx) 1549{ 1550 int disks = sh->disks; 1551 struct page xor_srcs = to_addr_page(percpu, 0); --- 5 unchanged lines hidden* (view full) --- 1557 1558 BUG_ON(sh->batch_head); 1559 pr_debug("%s: stripe %llu\n", __func__, 1560 (unsigned long long)sh->sector); 1561 1562 for (i = disks; i--; ) { 1563 struct r5dev dev = &sh->dev[i]; 1564 / Only process blocks that are known to be uptodate */	1549} 1550 1551static struct dma_async_tx_descriptor * 1552ops_run_prexor5(struct stripe_head sh, struct raid5_percpu percpu, 1553 struct dma_async_tx_descriptor tx) 1554{ 1555 int disks = sh->disks; 1556 struct page xor_srcs = to_addr_page(percpu, 0); --- 5 unchanged lines hidden* (view full) --- 1562 1563 BUG_ON(sh->batch_head); 1564 pr_debug("%s: stripe %llu\n", __func__, 1565 (unsigned long long)sh->sector); 1566 1567 for (i = disks; i--; ) { 1568 struct r5dev dev = &sh->dev[i]; 1569 / Only process blocks that are known to be uptodate */
1565 if (test_bit(R5_InJournal, &dev->flags)) 1566 xor_srcs[count++] = dev->orig_page; 1567 else if (test_bit(R5_Wantdrain, &dev->flags))	1570 if (test_bit(R5_Wantdrain, &dev->flags))
1568 xor_srcs[count++] = dev->page; 1569 } 1570 1571 init_async_submit(&submit, ASYNC_TX_FENCE\|ASYNC_TX_XOR_DROP_DST, tx, 1572 ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); 1573 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); 1574 1575 return tx; --- 17 unchanged lines hidden (view full) --- 1593 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); 1594 1595 return tx; 1596} 1597 1598static struct dma_async_tx_descriptor * 1599ops_run_biodrain(struct stripe_head sh, struct dma_async_tx_descriptor tx) 1600{	1571 xor_srcs[count++] = dev->page; 1572 } 1573 1574 init_async_submit(&submit, ASYNC_TX_FENCE\|ASYNC_TX_XOR_DROP_DST, tx, 1575 ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); 1576 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); 1577 1578 return tx; --- 17 unchanged lines hidden (view full) --- 1596 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); 1597 1598 return tx; 1599} 1600 1601static struct dma_async_tx_descriptor * 1602ops_run_biodrain(struct stripe_head sh, struct dma_async_tx_descriptor tx) 1603{
1601 struct r5conf *conf = sh->raid_conf;
1602 int disks = sh->disks; 1603 int i; 1604 struct stripe_head head_sh = sh; 1605 1606 pr_debug("%s: stripe %llu\n", __func__, 1607 (unsigned long long)sh->sector); 1608 1609 for (i = disks; i--; ) { 1610 struct r5dev dev; 1611 struct bio chosen; 1612 1613 sh = head_sh; 1614 if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) { 1615 struct bio wbi; 1616 1617again: 1618 dev = &sh->dev[i];	1604 int disks = sh->disks; 1605 int i; 1606 struct stripe_head head_sh = sh; 1607 1608 pr_debug("%s: stripe %llu\n", __func__, 1609 (unsigned long long)sh->sector); 1610 1611 for (i = disks; i--; ) { 1612 struct r5dev dev; 1613 struct bio chosen; 1614 1615 sh = head_sh; 1616 if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) { 1617 struct bio wbi; 1618 1619again: 1620 dev = &sh->dev[i];
1619 /* 1620 * clear R5_InJournal, so when rewriting a page in 1621 * journal, it is not skipped by r5l_log_stripe() 1622 */ 1623 clear_bit(R5_InJournal, &dev->flags);
1624 spin_lock_irq(&sh->stripe_lock); 1625 chosen = dev->towrite; 1626 dev->towrite = NULL; 1627 sh->overwrite_disks = 0; 1628 BUG_ON(dev->written); 1629 wbi = dev->written = chosen; 1630 spin_unlock_irq(&sh->stripe_lock); 1631 WARN_ON(dev->page != dev->orig_page); 1632 1633 while (wbi && wbi->bi_iter.bi_sector < 1634 dev->sector + STRIPE_SECTORS) { 1635 if (wbi->bi_opf & REQ_FUA) 1636 set_bit(R5_WantFUA, &dev->flags); 1637 if (wbi->bi_opf & REQ_SYNC) 1638 set_bit(R5_SyncIO, &dev->flags); 1639 if (bio_op(wbi) == REQ_OP_DISCARD) 1640 set_bit(R5_Discard, &dev->flags); 1641 else { 1642 tx = async_copy_data(1, wbi, &dev->page,	1621 spin_lock_irq(&sh->stripe_lock); 1622 chosen = dev->towrite; 1623 dev->towrite = NULL; 1624 sh->overwrite_disks = 0; 1625 BUG_ON(dev->written); 1626 wbi = dev->written = chosen; 1627 spin_unlock_irq(&sh->stripe_lock); 1628 WARN_ON(dev->page != dev->orig_page); 1629 1630 while (wbi && wbi->bi_iter.bi_sector < 1631 dev->sector + STRIPE_SECTORS) { 1632 if (wbi->bi_opf & REQ_FUA) 1633 set_bit(R5_WantFUA, &dev->flags); 1634 if (wbi->bi_opf & REQ_SYNC) 1635 set_bit(R5_SyncIO, &dev->flags); 1636 if (bio_op(wbi) == REQ_OP_DISCARD) 1637 set_bit(R5_Discard, &dev->flags); 1638 else { 1639 tx = async_copy_data(1, wbi, &dev->page,
1643 dev->sector, tx, sh, 1644 r5c_is_writeback(conf->log)); 1645 if (dev->page != dev->orig_page && 1646 !r5c_is_writeback(conf->log)) {	1640 dev->sector, tx, sh); 1641 if (dev->page != dev->orig_page) {
1647 set_bit(R5_SkipCopy, &dev->flags); 1648 clear_bit(R5_UPTODATE, &dev->flags); 1649 clear_bit(R5_OVERWRITE, &dev->flags); 1650 } 1651 } 1652 wbi = r5_next_bio(wbi, dev->sector); 1653 } 1654 --- 91 unchanged lines hidden (view full) --- 1746 /* check if prexor is active which means only process blocks 1747 * that are part of a read-modify-write (written) 1748 / 1749 if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) { 1750 prexor = 1; 1751 xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 1752 for (i = disks; i--; ) { 1753 struct r5dev dev = &sh->dev[i];	1642 set_bit(R5_SkipCopy, &dev->flags); 1643 clear_bit(R5_UPTODATE, &dev->flags); 1644 clear_bit(R5_OVERWRITE, &dev->flags); 1645 } 1646 } 1647 wbi = r5_next_bio(wbi, dev->sector); 1648 } 1649 --- 91 unchanged lines hidden (view full) --- 1741 /* check if prexor is active which means only process blocks 1742 * that are part of a read-modify-write (written) 1743 / 1744 if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) { 1745 prexor = 1; 1746 xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 1747 for (i = disks; i--; ) { 1748 struct r5dev dev = &sh->dev[i];
1754 if (head_sh->dev[i].written \|\| 1755 test_bit(R5_InJournal, &head_sh->dev[i].flags))	1749 if (head_sh->dev[i].written)
1756 xor_srcs[count++] = dev->page; 1757 } 1758 } else { 1759 xor_dest = sh->dev[pd_idx].page; 1760 for (i = disks; i--; ) { 1761 struct r5dev dev = &sh->dev[i]; 1762 if (i != pd_idx) 1763 xor_srcs[count++] = dev->page; --- 237 unchanged lines hidden* (view full) --- 2001 int i; 2002 2003 sh = kmem_cache_zalloc(sc, gfp); 2004 if (sh) { 2005 spin_lock_init(&sh->stripe_lock); 2006 spin_lock_init(&sh->batch_lock); 2007 INIT_LIST_HEAD(&sh->batch_list); 2008 INIT_LIST_HEAD(&sh->lru);	1750 xor_srcs[count++] = dev->page; 1751 } 1752 } else { 1753 xor_dest = sh->dev[pd_idx].page; 1754 for (i = disks; i--; ) { 1755 struct r5dev dev = &sh->dev[i]; 1756 if (i != pd_idx) 1757 xor_srcs[count++] = dev->page; --- 237 unchanged lines hidden* (view full) --- 1995 int i; 1996 1997 sh = kmem_cache_zalloc(sc, gfp); 1998 if (sh) { 1999 spin_lock_init(&sh->stripe_lock); 2000 spin_lock_init(&sh->batch_lock); 2001 INIT_LIST_HEAD(&sh->batch_list); 2002 INIT_LIST_HEAD(&sh->lru);
2009 INIT_LIST_HEAD(&sh->r5c);
2010 atomic_set(&sh->count, 1);	2003 atomic_set(&sh->count, 1);
2011 sh->log_start = MaxSector;
2012 for (i = 0; i < disks; i++) { 2013 struct r5dev *dev = &sh->dev[i]; 2014	2004 for (i = 0; i < disks; i++) { 2005 struct r5dev *dev = &sh->dev[i]; 2006
2015 bio_init(&dev->req); 2016 dev->req.bi_io_vec = &dev->vec; 2017 dev->req.bi_max_vecs = 1; 2018 2019 bio_init(&dev->rreq); 2020 dev->rreq.bi_io_vec = &dev->rvec; 2021 dev->rreq.bi_max_vecs = 1;	2007 bio_init(&dev->req, &dev->vec, 1); 2008 bio_init(&dev->rreq, &dev->rvec, 1);
2022 } 2023 } 2024 return sh; 2025} 2026static int grow_one_stripe(struct r5conf conf, gfp_t gfp) 2027{ 2028 struct stripe_head sh; 2029 --- 320 unchanged lines hidden (view full) --- 2350 s = sh->sector + rdev->data_offset; 2351 if (!bi->bi_error) { 2352 set_bit(R5_UPTODATE, &sh->dev[i].flags); 2353 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 2354 /* Note that this cannot happen on a 2355 * replacement device. We just fail those on 2356 * any error 2357 */	2009 } 2010 } 2011 return sh; 2012} 2013static int grow_one_stripe(struct r5conf conf, gfp_t gfp) 2014{ 2015 struct stripe_head sh; 2016 --- 320 unchanged lines hidden (view full) --- 2337 s = sh->sector + rdev->data_offset; 2338 if (!bi->bi_error) { 2339 set_bit(R5_UPTODATE, &sh->dev[i].flags); 2340 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 2341 /* Note that this cannot happen on a 2342 * replacement device. We just fail those on 2343 * any error 2344 */
2358 pr_info_ratelimited( 2359 "md/raid:%s: read error corrected (%lu sectors at %llu on %s)\n",	2345 printk_ratelimited( 2346 KERN_INFO 2347 "md/raid:%s: read error corrected" 2348 " (%lu sectors at %llu on %s)\n",
2360 mdname(conf->mddev), STRIPE_SECTORS, 2361 (unsigned long long)s, 2362 bdevname(rdev->bdev, b)); 2363 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); 2364 clear_bit(R5_ReadError, &sh->dev[i].flags); 2365 clear_bit(R5_ReWrite, &sh->dev[i].flags); 2366 } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 2367 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); 2368 2369 if (atomic_read(&rdev->read_errors)) 2370 atomic_set(&rdev->read_errors, 0); 2371 } else { 2372 const char *bdn = bdevname(rdev->bdev, b); 2373 int retry = 0; 2374 int set_bad = 0; 2375 2376 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 2377 atomic_inc(&rdev->read_errors); 2378 if (test_bit(R5_ReadRepl, &sh->dev[i].flags))	2349 mdname(conf->mddev), STRIPE_SECTORS, 2350 (unsigned long long)s, 2351 bdevname(rdev->bdev, b)); 2352 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); 2353 clear_bit(R5_ReadError, &sh->dev[i].flags); 2354 clear_bit(R5_ReWrite, &sh->dev[i].flags); 2355 } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 2356 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); 2357 2358 if (atomic_read(&rdev->read_errors)) 2359 atomic_set(&rdev->read_errors, 0); 2360 } else { 2361 const char *bdn = bdevname(rdev->bdev, b); 2362 int retry = 0; 2363 int set_bad = 0; 2364 2365 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 2366 atomic_inc(&rdev->read_errors); 2367 if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
2379 pr_warn_ratelimited( 2380 "md/raid:%s: read error on replacement device (sector %llu on %s).\n",	2368 printk_ratelimited( 2369 KERN_WARNING 2370 "md/raid:%s: read error on replacement device " 2371 "(sector %llu on %s).\n",
2381 mdname(conf->mddev), 2382 (unsigned long long)s, 2383 bdn); 2384 else if (conf->mddev->degraded >= conf->max_degraded) { 2385 set_bad = 1;	2372 mdname(conf->mddev), 2373 (unsigned long long)s, 2374 bdn); 2375 else if (conf->mddev->degraded >= conf->max_degraded) { 2376 set_bad = 1;
2386 pr_warn_ratelimited( 2387 "md/raid:%s: read error not correctable (sector %llu on %s).\n",	2377 printk_ratelimited( 2378 KERN_WARNING 2379 "md/raid:%s: read error not correctable " 2380 "(sector %llu on %s).\n",
2388 mdname(conf->mddev), 2389 (unsigned long long)s, 2390 bdn); 2391 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { 2392 /* Oh, no!!! */ 2393 set_bad = 1;	2381 mdname(conf->mddev), 2382 (unsigned long long)s, 2383 bdn); 2384 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { 2385 /* Oh, no!!! */ 2386 set_bad = 1;
2394 pr_warn_ratelimited( 2395 "md/raid:%s: read error NOT corrected!! (sector %llu on %s).\n",	2387 printk_ratelimited( 2388 KERN_WARNING 2389 "md/raid:%s: read error NOT corrected!! " 2390 "(sector %llu on %s).\n",
2396 mdname(conf->mddev), 2397 (unsigned long long)s, 2398 bdn); 2399 } else if (atomic_read(&rdev->read_errors) 2400 > conf->max_nr_stripes)	2391 mdname(conf->mddev), 2392 (unsigned long long)s, 2393 bdn); 2394 } else if (atomic_read(&rdev->read_errors) 2395 > conf->max_nr_stripes)
2401 pr_warn("md/raid:%s: Too many read errors, failing device %s.\n",	2396 printk(KERN_WARNING 2397 "md/raid:%s: Too many read errors, failing device %s.\n",
2402 mdname(conf->mddev), bdn); 2403 else 2404 retry = 1; 2405 if (set_bad && test_bit(In_sync, &rdev->flags) 2406 && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 2407 retry = 1; 2408 if (retry) 2409 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { --- 117 unchanged lines hidden (view full) --- 2527 mddev->degraded = calc_degraded(conf); 2528 spin_unlock_irqrestore(&conf->device_lock, flags); 2529 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 2530 2531 set_bit(Blocked, &rdev->flags); 2532 set_bit(Faulty, &rdev->flags); 2533 set_mask_bits(&mddev->flags, 0, 2534 BIT(MD_CHANGE_DEVS) \| BIT(MD_CHANGE_PENDING));	2398 mdname(conf->mddev), bdn); 2399 else 2400 retry = 1; 2401 if (set_bad && test_bit(In_sync, &rdev->flags) 2402 && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 2403 retry = 1; 2404 if (retry) 2405 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { --- 117 unchanged lines hidden (view full) --- 2523 mddev->degraded = calc_degraded(conf); 2524 spin_unlock_irqrestore(&conf->device_lock, flags); 2525 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 2526 2527 set_bit(Blocked, &rdev->flags); 2528 set_bit(Faulty, &rdev->flags); 2529 set_mask_bits(&mddev->flags, 0, 2530 BIT(MD_CHANGE_DEVS) \| BIT(MD_CHANGE_PENDING));
2535 pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n" 2536 "md/raid:%s: Operation continuing on %d devices.\n", 2537 mdname(mddev), 2538 bdevname(rdev->bdev, b), 2539 mdname(mddev), 2540 conf->raid_disks - mddev->degraded);	2531 printk(KERN_ALERT 2532 "md/raid:%s: Disk failure on %s, disabling device.\n" 2533 "md/raid:%s: Operation continuing on %d devices.\n", 2534 mdname(mddev), 2535 bdevname(rdev->bdev, b), 2536 mdname(mddev), 2537 conf->raid_disks - mddev->degraded);
2541} 2542 2543/* 2544 * Input: a 'big' sector number, 2545 * Output: index of the data and parity disk, and the sector # in them. 2546 / 2547sector_t raid5_compute_sector(struct r5conf conf, sector_t r_sector, 2548 int previous, int dd_idx, --- 305 unchanged lines hidden* (view full) --- 2854 2855 chunk_number = stripe * data_disks + i; 2856 r_sector = chunk_number * sectors_per_chunk + chunk_offset; 2857 2858 check = raid5_compute_sector(conf, r_sector, 2859 previous, &dummy1, &sh2); 2860 if (check != sh->sector \|\| dummy1 != dd_idx \|\| sh2.pd_idx != sh->pd_idx 2861 \|\| sh2.qd_idx != sh->qd_idx) {	2538} 2539 2540/* 2541 * Input: a 'big' sector number, 2542 * Output: index of the data and parity disk, and the sector # in them. 2543 / 2544sector_t raid5_compute_sector(struct r5conf conf, sector_t r_sector, 2545 int previous, int dd_idx, --- 305 unchanged lines hidden* (view full) --- 2851 2852 chunk_number = stripe * data_disks + i; 2853 r_sector = chunk_number * sectors_per_chunk + chunk_offset; 2854 2855 check = raid5_compute_sector(conf, r_sector, 2856 previous, &dummy1, &sh2); 2857 if (check != sh->sector \|\| dummy1 != dd_idx \|\| sh2.pd_idx != sh->pd_idx 2858 \|\| sh2.qd_idx != sh->qd_idx) {
2862 pr_warn("md/raid:%s: compute_blocknr: map not correct\n", 2863 mdname(conf->mddev));	2859 printk(KERN_ERR "md/raid:%s: compute_blocknr: map not correct\n", 2860 mdname(conf->mddev));
2864 return 0; 2865 } 2866 return r_sector; 2867} 2868 2869static void 2870schedule_reconstruction(struct stripe_head sh, struct stripe_head_state s, 2871 int rcw, int expand) 2872{ 2873 int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks; 2874 struct r5conf *conf = sh->raid_conf; 2875 int level = conf->level; 2876 2877 if (rcw) {	2861 return 0; 2862 } 2863 return r_sector; 2864} 2865 2866static void 2867schedule_reconstruction(struct stripe_head sh, struct stripe_head_state s, 2868 int rcw, int expand) 2869{ 2870 int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks; 2871 struct r5conf *conf = sh->raid_conf; 2872 int level = conf->level; 2873 2874 if (rcw) {
2878 /* 2879 * In some cases, handle_stripe_dirtying initially decided to 2880 * run rmw and allocates extra page for prexor. However, rcw is 2881 * cheaper later on. We need to free the extra page now, 2882 * because we won't be able to do that in ops_complete_prexor(). 2883 */ 2884 r5c_release_extra_page(sh);
2885 2886 for (i = disks; i--; ) { 2887 struct r5dev *dev = &sh->dev[i]; 2888 2889 if (dev->towrite) { 2890 set_bit(R5_LOCKED, &dev->flags); 2891 set_bit(R5_Wantdrain, &dev->flags); 2892 if (!expand) 2893 clear_bit(R5_UPTODATE, &dev->flags); 2894 s->locked++;	2875 2876 for (i = disks; i--; ) { 2877 struct r5dev *dev = &sh->dev[i]; 2878 2879 if (dev->towrite) { 2880 set_bit(R5_LOCKED, &dev->flags); 2881 set_bit(R5_Wantdrain, &dev->flags); 2882 if (!expand) 2883 clear_bit(R5_UPTODATE, &dev->flags); 2884 s->locked++;
2895 } else if (test_bit(R5_InJournal, &dev->flags)) { 2896 set_bit(R5_LOCKED, &dev->flags); 2897 s->locked++;
2898 } 2899 } 2900 /* if we are not expanding this is a proper write request, and 2901 * there will be bios with new data to be drained into the 2902 * stripe cache 2903 / 2904 if (!expand) { 2905 if (!s->locked) --- 23 unchanged lines hidden* (view full) --- 2929 2930 if (dev->towrite && 2931 (test_bit(R5_UPTODATE, &dev->flags) \|\| 2932 test_bit(R5_Wantcompute, &dev->flags))) { 2933 set_bit(R5_Wantdrain, &dev->flags); 2934 set_bit(R5_LOCKED, &dev->flags); 2935 clear_bit(R5_UPTODATE, &dev->flags); 2936 s->locked++;	2885 } 2886 } 2887 /* if we are not expanding this is a proper write request, and 2888 * there will be bios with new data to be drained into the 2889 * stripe cache 2890 / 2891 if (!expand) { 2892 if (!s->locked) --- 23 unchanged lines hidden* (view full) --- 2916 2917 if (dev->towrite && 2918 (test_bit(R5_UPTODATE, &dev->flags) \|\| 2919 test_bit(R5_Wantcompute, &dev->flags))) { 2920 set_bit(R5_Wantdrain, &dev->flags); 2921 set_bit(R5_LOCKED, &dev->flags); 2922 clear_bit(R5_UPTODATE, &dev->flags); 2923 s->locked++;
2937 } else if (test_bit(R5_InJournal, &dev->flags)) { 2938 set_bit(R5_LOCKED, &dev->flags); 2939 s->locked++;
2940 } 2941 } 2942 if (!s->locked) 2943 /* False alarm - nothing to do / 2944 return; 2945 sh->reconstruct_state = reconstruct_state_prexor_drain_run; 2946 set_bit(STRIPE_OP_PREXOR, &s->ops_request); 2947 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); --- 655 unchanged lines hidden* (view full) --- 3603 / 3604 rcw = 1; rmw = 2; 3605 pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n", 3606 conf->rmw_level, (unsigned long long)recovery_cp, 3607 (unsigned long long)sh->sector); 3608 } else for (i = disks; i--; ) { 3609 / would I have to read this buffer for read_modify_write / 3610 struct r5dev dev = &sh->dev[i];	2924 } 2925 } 2926 if (!s->locked) 2927 /* False alarm - nothing to do / 2928 return; 2929 sh->reconstruct_state = reconstruct_state_prexor_drain_run; 2930 set_bit(STRIPE_OP_PREXOR, &s->ops_request); 2931 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); --- 655 unchanged lines hidden* (view full) --- 3587 / 3588 rcw = 1; rmw = 2; 3589 pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n", 3590 conf->rmw_level, (unsigned long long)recovery_cp, 3591 (unsigned long long)sh->sector); 3592 } else for (i = disks; i--; ) { 3593 / would I have to read this buffer for read_modify_write / 3594 struct r5dev dev = &sh->dev[i];
3611 if ((dev->towrite \|\| i == sh->pd_idx \|\| i == sh->qd_idx \|\| 3612 test_bit(R5_InJournal, &dev->flags)) &&	3595 if ((dev->towrite \|\| i == sh->pd_idx \|\| i == sh->qd_idx) &&
3613 !test_bit(R5_LOCKED, &dev->flags) &&	3596 !test_bit(R5_LOCKED, &dev->flags) &&
3614 !((test_bit(R5_UPTODATE, &dev->flags) && 3615 (!test_bit(R5_InJournal, &dev->flags) \|\| 3616 dev->page != dev->orig_page)) \|\|	3597 !(test_bit(R5_UPTODATE, &dev->flags) \|\|
3617 test_bit(R5_Wantcompute, &dev->flags))) { 3618 if (test_bit(R5_Insync, &dev->flags)) 3619 rmw++; 3620 else 3621 rmw += 2disks; / cannot read it / 3622 } 3623 / Would I have to read this buffer for reconstruct_write */ 3624 if (!test_bit(R5_OVERWRITE, &dev->flags) && 3625 i != sh->pd_idx && i != sh->qd_idx && 3626 !test_bit(R5_LOCKED, &dev->flags) && 3627 !(test_bit(R5_UPTODATE, &dev->flags) \|\|	3598 test_bit(R5_Wantcompute, &dev->flags))) { 3599 if (test_bit(R5_Insync, &dev->flags)) 3600 rmw++; 3601 else 3602 rmw += 2disks; / cannot read it / 3603 } 3604 / Would I have to read this buffer for reconstruct_write */ 3605 if (!test_bit(R5_OVERWRITE, &dev->flags) && 3606 i != sh->pd_idx && i != sh->qd_idx && 3607 !test_bit(R5_LOCKED, &dev->flags) && 3608 !(test_bit(R5_UPTODATE, &dev->flags) \|\|
3628 test_bit(R5_InJournal, &dev->flags) \|\| 3629 test_bit(R5_Wantcompute, &dev->flags))) {	3609 test_bit(R5_Wantcompute, &dev->flags))) {
3630 if (test_bit(R5_Insync, &dev->flags)) 3631 rcw++; 3632 else 3633 rcw += 2*disks; 3634 } 3635 }	3610 if (test_bit(R5_Insync, &dev->flags)) 3611 rcw++; 3612 else 3613 rcw += 2*disks; 3614 } 3615 }
3636
3637 pr_debug("for sector %llu, rmw=%d rcw=%d\n", 3638 (unsigned long long)sh->sector, rmw, rcw); 3639 set_bit(STRIPE_HANDLE, &sh->state); 3640 if ((rmw < rcw \|\| (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) { 3641 /* prefer read-modify-write, but need to get some data / 3642 if (conf->mddev->queue) 3643 blk_add_trace_msg(conf->mddev->queue, 3644 "raid5 rmw %llu %d", 3645 (unsigned long long)sh->sector, rmw); 3646 for (i = disks; i--; ) { 3647 struct r5dev dev = &sh->dev[i];	3616 pr_debug("for sector %llu, rmw=%d rcw=%d\n", 3617 (unsigned long long)sh->sector, rmw, rcw); 3618 set_bit(STRIPE_HANDLE, &sh->state); 3619 if ((rmw < rcw \|\| (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) { 3620 /* prefer read-modify-write, but need to get some data / 3621 if (conf->mddev->queue) 3622 blk_add_trace_msg(conf->mddev->queue, 3623 "raid5 rmw %llu %d", 3624 (unsigned long long)sh->sector, rmw); 3625 for (i = disks; i--; ) { 3626 struct r5dev dev = &sh->dev[i];
3648 if (test_bit(R5_InJournal, &dev->flags) && 3649 dev->page == dev->orig_page && 3650 !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) { 3651 /* alloc page for prexor / 3652 dev->orig_page = alloc_page(GFP_NOIO); 3653 3654 / will handle failure in a later patch*/ 3655 BUG_ON(!dev->orig_page); 3656 } 3657 3658 if ((dev->towrite \|\| 3659 i == sh->pd_idx \|\| i == sh->qd_idx \|\| 3660 test_bit(R5_InJournal, &dev->flags)) &&	3627 if ((dev->towrite \|\| i == sh->pd_idx \|\| i == sh->qd_idx) &&
3661 !test_bit(R5_LOCKED, &dev->flags) &&	3628 !test_bit(R5_LOCKED, &dev->flags) &&
3662 !((test_bit(R5_UPTODATE, &dev->flags) && 3663 (!test_bit(R5_InJournal, &dev->flags) \|\| 3664 dev->page != dev->orig_page)) \|\| 3665 test_bit(R5_Wantcompute, &dev->flags)) &&	3629 !(test_bit(R5_UPTODATE, &dev->flags) \|\| 3630 test_bit(R5_Wantcompute, &dev->flags)) &&
3666 test_bit(R5_Insync, &dev->flags)) { 3667 if (test_bit(STRIPE_PREREAD_ACTIVE, 3668 &sh->state)) { 3669 pr_debug("Read_old block %d for r-m-w\n", 3670 i); 3671 set_bit(R5_LOCKED, &dev->flags); 3672 set_bit(R5_Wantread, &dev->flags); 3673 s->locked++; --- 9 unchanged lines hidden (view full) --- 3683 int qread =0; 3684 rcw = 0; 3685 for (i = disks; i--; ) { 3686 struct r5dev *dev = &sh->dev[i]; 3687 if (!test_bit(R5_OVERWRITE, &dev->flags) && 3688 i != sh->pd_idx && i != sh->qd_idx && 3689 !test_bit(R5_LOCKED, &dev->flags) && 3690 !(test_bit(R5_UPTODATE, &dev->flags) \|\|	3631 test_bit(R5_Insync, &dev->flags)) { 3632 if (test_bit(STRIPE_PREREAD_ACTIVE, 3633 &sh->state)) { 3634 pr_debug("Read_old block %d for r-m-w\n", 3635 i); 3636 set_bit(R5_LOCKED, &dev->flags); 3637 set_bit(R5_Wantread, &dev->flags); 3638 s->locked++; --- 9 unchanged lines hidden (view full) --- 3648 int qread =0; 3649 rcw = 0; 3650 for (i = disks; i--; ) { 3651 struct r5dev *dev = &sh->dev[i]; 3652 if (!test_bit(R5_OVERWRITE, &dev->flags) && 3653 i != sh->pd_idx && i != sh->qd_idx && 3654 !test_bit(R5_LOCKED, &dev->flags) && 3655 !(test_bit(R5_UPTODATE, &dev->flags) \|\|
3691 test_bit(R5_InJournal, &dev->flags) \|\|
3692 test_bit(R5_Wantcompute, &dev->flags))) { 3693 rcw++; 3694 if (test_bit(R5_Insync, &dev->flags) && 3695 test_bit(STRIPE_PREREAD_ACTIVE, 3696 &sh->state)) { 3697 pr_debug("Read_old block " 3698 "%d for Reconstruct\n", i); 3699 set_bit(R5_LOCKED, &dev->flags); --- 23 unchanged lines hidden (view full) --- 3723 * case where a compute block operation has been submitted and then a 3724 * subsequent call wants to start a write request. raid_run_ops only 3725 * handles the case where compute block and reconstruct are requested 3726 * simultaneously. If this is not the case then new writes need to be 3727 * held off until the compute completes. 3728 */ 3729 if ((s->req_compute \|\| !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && 3730 (s->locked == 0 && (rcw == 0 \|\| rmw == 0) &&	3656 test_bit(R5_Wantcompute, &dev->flags))) { 3657 rcw++; 3658 if (test_bit(R5_Insync, &dev->flags) && 3659 test_bit(STRIPE_PREREAD_ACTIVE, 3660 &sh->state)) { 3661 pr_debug("Read_old block " 3662 "%d for Reconstruct\n", i); 3663 set_bit(R5_LOCKED, &dev->flags); --- 23 unchanged lines hidden (view full) --- 3687 * case where a compute block operation has been submitted and then a 3688 * subsequent call wants to start a write request. raid_run_ops only 3689 * handles the case where compute block and reconstruct are requested 3690 * simultaneously. If this is not the case then new writes need to be 3691 * held off until the compute completes. 3692 */ 3693 if ((s->req_compute \|\| !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && 3694 (s->locked == 0 && (rcw == 0 \|\| rmw == 0) &&
3731 !test_bit(STRIPE_BIT_DELAY, &sh->state)))	3695 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
3732 schedule_reconstruction(sh, s, rcw == 0, 0); 3733} 3734 3735static void handle_parity_checks5(struct r5conf conf, struct stripe_head sh, 3736 struct stripe_head_state s, int disks) 3737{ 3738 struct r5dev dev = NULL; 3739 --- 68 unchanged lines hidden (view full) --- 3808 sh->ops.target2 = -1; 3809 s->uptodate++; 3810 } 3811 } 3812 break; 3813 case check_state_compute_run: 3814 break; 3815 default:	3696 schedule_reconstruction(sh, s, rcw == 0, 0); 3697} 3698 3699static void handle_parity_checks5(struct r5conf conf, struct stripe_head sh, 3700 struct stripe_head_state s, int disks) 3701{ 3702 struct r5dev dev = NULL; 3703 --- 68 unchanged lines hidden (view full) --- 3772 sh->ops.target2 = -1; 3773 s->uptodate++; 3774 } 3775 } 3776 break; 3777 case check_state_compute_run: 3778 break; 3779 default:
3816 pr_err("%s: unknown check_state: %d sector: %llu\n",	3780 printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
3817 __func__, sh->check_state, 3818 (unsigned long long) sh->sector); 3819 BUG(); 3820 } 3821} 3822 3823static void handle_parity_checks6(struct r5conf conf, struct stripe_head sh, 3824 struct stripe_head_state s, --- 147 unchanged lines hidden* (view full) --- 3972 s->uptodate++; 3973 } 3974 } 3975 } 3976 break; 3977 case check_state_compute_run: 3978 break; 3979 default:	3781 __func__, sh->check_state, 3782 (unsigned long long) sh->sector); 3783 BUG(); 3784 } 3785} 3786 3787static void handle_parity_checks6(struct r5conf conf, struct stripe_head sh, 3788 struct stripe_head_state s, --- 147 unchanged lines hidden* (view full) --- 3936 s->uptodate++; 3937 } 3938 } 3939 } 3940 break; 3941 case check_state_compute_run: 3942 break; 3943 default:
3980 pr_warn("%s: unknown check_state: %d sector: %llu\n", 3981 __func__, sh->check_state, 3982 (unsigned long long) sh->sector);	3944 printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n", 3945 __func__, sh->check_state, 3946 (unsigned long long) sh->sector);
3983 BUG(); 3984 } 3985} 3986 3987static void handle_stripe_expansion(struct r5conf conf, struct stripe_head sh) 3988{ 3989 int i; 3990 --- 223 unchanged lines hidden (view full) --- 4214 clear_bit(R5_Insync, &dev->flags); 4215 if (!test_bit(R5_Insync, &dev->flags)) { 4216 if (s->failed < 2) 4217 s->failed_num[s->failed] = i; 4218 s->failed++; 4219 if (rdev && !test_bit(Faulty, &rdev->flags)) 4220 do_recovery = 1; 4221 }	3947 BUG(); 3948 } 3949} 3950 3951static void handle_stripe_expansion(struct r5conf conf, struct stripe_head sh) 3952{ 3953 int i; 3954 --- 223 unchanged lines hidden (view full) --- 4178 clear_bit(R5_Insync, &dev->flags); 4179 if (!test_bit(R5_Insync, &dev->flags)) { 4180 if (s->failed < 2) 4181 s->failed_num[s->failed] = i; 4182 s->failed++; 4183 if (rdev && !test_bit(Faulty, &rdev->flags)) 4184 do_recovery = 1; 4185 }
4222 4223 if (test_bit(R5_InJournal, &dev->flags)) 4224 s->injournal++; 4225 if (test_bit(R5_InJournal, &dev->flags) && dev->written) 4226 s->just_cached++;
4227 } 4228 if (test_bit(STRIPE_SYNCING, &sh->state)) { 4229 /* If there is a failed device being replaced, 4230 * we must be recovering. 4231 * else if we are after recovery_cp, we must be syncing 4232 * else if MD_RECOVERY_REQUESTED is set, we also are syncing. 4233 * else we can only be replacing 4234 * sync and recovery both need to read all devices, and so --- 212 unchanged lines hidden (view full) --- 4447 !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)); 4448 BUG_ON(sh->qd_idx >= 0 && 4449 !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) && 4450 !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags)); 4451 for (i = disks; i--; ) { 4452 struct r5dev *dev = &sh->dev[i]; 4453 if (test_bit(R5_LOCKED, &dev->flags) && 4454 (i == sh->pd_idx \|\| i == sh->qd_idx \|\|	4186 } 4187 if (test_bit(STRIPE_SYNCING, &sh->state)) { 4188 /* If there is a failed device being replaced, 4189 * we must be recovering. 4190 * else if we are after recovery_cp, we must be syncing 4191 * else if MD_RECOVERY_REQUESTED is set, we also are syncing. 4192 * else we can only be replacing 4193 * sync and recovery both need to read all devices, and so --- 212 unchanged lines hidden (view full) --- 4406 !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)); 4407 BUG_ON(sh->qd_idx >= 0 && 4408 !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) && 4409 !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags)); 4410 for (i = disks; i--; ) { 4411 struct r5dev *dev = &sh->dev[i]; 4412 if (test_bit(R5_LOCKED, &dev->flags) && 4413 (i == sh->pd_idx \|\| i == sh->qd_idx \|\|
4455 dev->written \|\| test_bit(R5_InJournal, 4456 &dev->flags))) {	4414 dev->written)) {
4457 pr_debug("Writing block %d\n", i); 4458 set_bit(R5_Wantwrite, &dev->flags); 4459 if (prexor) 4460 continue; 4461 if (s.failed > 1) 4462 continue; 4463 if (!test_bit(R5_Insync, &dev->flags) \|\| 4464 ((i == sh->pd_idx \|\| i == sh->qd_idx) && --- 23 unchanged lines hidden (view full) --- 4488 && (test_bit(R5_UPTODATE, &pdev->flags) \|\| 4489 test_bit(R5_Discard, &pdev->flags))))) && 4490 (s.q_failed \|\| ((test_bit(R5_Insync, &qdev->flags) 4491 && !test_bit(R5_LOCKED, &qdev->flags) 4492 && (test_bit(R5_UPTODATE, &qdev->flags) \|\| 4493 test_bit(R5_Discard, &qdev->flags)))))) 4494 handle_stripe_clean_event(conf, sh, disks, &s.return_bi); 4495	4415 pr_debug("Writing block %d\n", i); 4416 set_bit(R5_Wantwrite, &dev->flags); 4417 if (prexor) 4418 continue; 4419 if (s.failed > 1) 4420 continue; 4421 if (!test_bit(R5_Insync, &dev->flags) \|\| 4422 ((i == sh->pd_idx \|\| i == sh->qd_idx) && --- 23 unchanged lines hidden (view full) --- 4446 && (test_bit(R5_UPTODATE, &pdev->flags) \|\| 4447 test_bit(R5_Discard, &pdev->flags))))) && 4448 (s.q_failed \|\| ((test_bit(R5_Insync, &qdev->flags) 4449 && !test_bit(R5_LOCKED, &qdev->flags) 4450 && (test_bit(R5_UPTODATE, &qdev->flags) \|\| 4451 test_bit(R5_Discard, &qdev->flags)))))) 4452 handle_stripe_clean_event(conf, sh, disks, &s.return_bi); 4453
4496 if (s.just_cached) 4497 r5c_handle_cached_data_endio(conf, sh, disks, &s.return_bi); 4498 r5l_stripe_write_finished(sh); 4499
4500 /* Now we might consider reading some blocks, either to check/generate 4501 * parity, or to satisfy requests 4502 * or to load a block that is being partially written. 4503 */ 4504 if (s.to_read \|\| s.non_overwrite 4505 \|\| (conf->level == 6 && s.to_write && s.failed) 4506 \|\| (s.syncing && (s.uptodate + s.compute < disks)) 4507 \|\| s.replacing 4508 \|\| s.expanding) 4509 handle_stripe_fill(sh, &s, disks); 4510	4454 /* Now we might consider reading some blocks, either to check/generate 4455 * parity, or to satisfy requests 4456 * or to load a block that is being partially written. 4457 */ 4458 if (s.to_read \|\| s.non_overwrite 4459 \|\| (conf->level == 6 && s.to_write && s.failed) 4460 \|\| (s.syncing && (s.uptodate + s.compute < disks)) 4461 \|\| s.replacing 4462 \|\| s.expanding) 4463 handle_stripe_fill(sh, &s, disks); 4464
4511 /* 4512 * When the stripe finishes full journal write cycle (write to journal 4513 * and raid disk), this is the clean up procedure so it is ready for 4514 * next operation. 4515 / 4516 r5c_finish_stripe_write_out(conf, sh, &s); 4517 4518 / 4519 * Now to consider new write requests, cache write back and what else, 4520 * if anything should be read. We do not handle new writes when:	4465 /* Now to consider new write requests and what else, if anything 4466 * should be read. We do not handle new writes when:
4521 * 1/ A 'write' operation (copy+xor) is already in flight. 4522 * 2/ A 'check' operation is in flight, as it may clobber the parity 4523 * block.	4467 * 1/ A 'write' operation (copy+xor) is already in flight. 4468 * 2/ A 'check' operation is in flight, as it may clobber the parity 4469 * block.
4524 * 3/ A r5c cache log write is in flight.
4525 */	4470 */
	4471 if (s.to_write && !sh->reconstruct_state && !sh->check_state) 4472 handle_stripe_dirtying(conf, sh, &s, disks);
4526	4473
4527 if (!sh->reconstruct_state && !sh->check_state && !sh->log_io) { 4528 if (!r5c_is_writeback(conf->log)) { 4529 if (s.to_write) 4530 handle_stripe_dirtying(conf, sh, &s, disks); 4531 } else { /* write back cache / 4532 int ret = 0; 4533 4534 / First, try handle writes in caching phase / 4535 if (s.to_write) 4536 ret = r5c_try_caching_write(conf, sh, &s, 4537 disks); 4538 / 4539 * If caching phase failed: ret == -EAGAIN 4540 * OR 4541 * stripe under reclaim: !caching && injournal 4542 * 4543 * fall back to handle_stripe_dirtying() 4544 / 4545 if (ret == -EAGAIN \|\| 4546 / stripe under reclaim: !caching && injournal */ 4547 (!test_bit(STRIPE_R5C_CACHING, &sh->state) && 4548 s.injournal > 0)) 4549 handle_stripe_dirtying(conf, sh, &s, disks); 4550 } 4551 } 4552
4553 /* maybe we need to check and possibly fix the parity for this stripe 4554 * Any reads will already have been scheduled, so we just see if enough 4555 * data is available. The parity check is held off while parity 4556 * dependent operations are in flight. 4557 / 4558 if (sh->check_state \|\| 4559 (s.syncing && s.locked == 0 && 4560 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && --- 153 unchanged lines hidden* (view full) --- 4714 */ 4715 atomic_dec(&conf->preread_active_stripes); 4716 if (atomic_read(&conf->preread_active_stripes) < 4717 IO_THRESHOLD) 4718 md_wakeup_thread(conf->mddev->thread); 4719 } 4720 4721 if (!bio_list_empty(&s.return_bi)) {	4474 /* maybe we need to check and possibly fix the parity for this stripe 4475 * Any reads will already have been scheduled, so we just see if enough 4476 * data is available. The parity check is held off while parity 4477 * dependent operations are in flight. 4478 / 4479 if (sh->check_state \|\| 4480 (s.syncing && s.locked == 0 && 4481 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && --- 153 unchanged lines hidden* (view full) --- 4635 */ 4636 atomic_dec(&conf->preread_active_stripes); 4637 if (atomic_read(&conf->preread_active_stripes) < 4638 IO_THRESHOLD) 4639 md_wakeup_thread(conf->mddev->thread); 4640 } 4641 4642 if (!bio_list_empty(&s.return_bi)) {
4722 if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {	4643 if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) && 4644 (s.failed <= conf->max_degraded \|\| 4645 conf->mddev->external == 0)) {
4723 spin_lock_irq(&conf->device_lock); 4724 bio_list_merge(&conf->return_bi, &s.return_bi); 4725 spin_unlock_irq(&conf->device_lock); 4726 md_wakeup_thread(conf->mddev->thread); 4727 } else 4728 return_io(&s.return_bi); 4729 } 4730 --- 39 unchanged lines hidden (view full) --- 4770 struct r5conf conf = mddev->private; 4771 4772 / No difference between reads and writes. Just check 4773 * how busy the stripe_cache is 4774 */ 4775 4776 if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) 4777 return 1;	4646 spin_lock_irq(&conf->device_lock); 4647 bio_list_merge(&conf->return_bi, &s.return_bi); 4648 spin_unlock_irq(&conf->device_lock); 4649 md_wakeup_thread(conf->mddev->thread); 4650 } else 4651 return_io(&s.return_bi); 4652 } 4653 --- 39 unchanged lines hidden (view full) --- 4693 struct r5conf conf = mddev->private; 4694 4695 / No difference between reads and writes. Just check 4696 * how busy the stripe_cache is 4697 */ 4698 4699 if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) 4700 return 1;
4778 4779 /* Also checks whether there is pressure on r5cache log space */ 4780 if (test_bit(R5C_LOG_TIGHT, &conf->cache_state)) 4781 return 1;
4782 if (conf->quiesce) 4783 return 1; 4784 if (atomic_read(&conf->empty_inactive_list_nr)) 4785 return 1; 4786 4787 return 0; 4788} 4789 --- 453 unchanged lines hidden (view full) --- 5243 int dd_idx; 5244 sector_t new_sector; 5245 sector_t logical_sector, last_sector; 5246 struct stripe_head *sh; 5247 const int rw = bio_data_dir(bi); 5248 int remaining; 5249 DEFINE_WAIT(w); 5250 bool do_prepare;	4701 if (conf->quiesce) 4702 return 1; 4703 if (atomic_read(&conf->empty_inactive_list_nr)) 4704 return 1; 4705 4706 return 0; 4707} 4708 --- 453 unchanged lines hidden (view full) --- 5162 int dd_idx; 5163 sector_t new_sector; 5164 sector_t logical_sector, last_sector; 5165 struct stripe_head *sh; 5166 const int rw = bio_data_dir(bi); 5167 int remaining; 5168 DEFINE_WAIT(w); 5169 bool do_prepare;
5251 bool do_flush = false;
5252 5253 if (unlikely(bi->bi_opf & REQ_PREFLUSH)) { 5254 int ret = r5l_handle_flush_request(conf->log, bi); 5255 5256 if (ret == 0) 5257 return; 5258 if (ret == -ENODEV) { 5259 md_flush_request(mddev, bi); 5260 return; 5261 } 5262 /* ret == -EAGAIN, fallback */	5170 5171 if (unlikely(bi->bi_opf & REQ_PREFLUSH)) { 5172 int ret = r5l_handle_flush_request(conf->log, bi); 5173 5174 if (ret == 0) 5175 return; 5176 if (ret == -ENODEV) { 5177 md_flush_request(mddev, bi); 5178 return; 5179 } 5180 /* ret == -EAGAIN, fallback */
5263 /* 5264 * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH, 5265 * we need to flush journal device 5266 */ 5267 do_flush = bi->bi_opf & REQ_PREFLUSH;
5268 } 5269 5270 md_write_start(mddev, bi); 5271 5272 /* 5273 * If array is degraded, better not do chunk aligned read because 5274 * later we might have to read it again in order to reconstruct 5275 * data on failed drives. 5276 */ 5277 if (rw == READ && mddev->degraded == 0 &&	5181 } 5182 5183 md_write_start(mddev, bi); 5184 5185 /* 5186 * If array is degraded, better not do chunk aligned read because 5187 * later we might have to read it again in order to reconstruct 5188 * data on failed drives. 5189 */ 5190 if (rw == READ && mddev->degraded == 0 &&
5278 !r5c_is_writeback(conf->log) &&
5279 mddev->reshape_position == MaxSector) { 5280 bi = chunk_aligned_read(mddev, bi); 5281 if (!bi) 5282 return; 5283 } 5284 5285 if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) { 5286 make_discard_request(mddev, bi); --- 112 unchanged lines hidden (view full) --- 5399 * and wait a while 5400 */ 5401 md_wakeup_thread(mddev->thread); 5402 raid5_release_stripe(sh); 5403 schedule(); 5404 do_prepare = true; 5405 goto retry; 5406 }	5191 mddev->reshape_position == MaxSector) { 5192 bi = chunk_aligned_read(mddev, bi); 5193 if (!bi) 5194 return; 5195 } 5196 5197 if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) { 5198 make_discard_request(mddev, bi); --- 112 unchanged lines hidden (view full) --- 5311 * and wait a while 5312 */ 5313 md_wakeup_thread(mddev->thread); 5314 raid5_release_stripe(sh); 5315 schedule(); 5316 do_prepare = true; 5317 goto retry; 5318 }
5407 if (do_flush) { 5408 set_bit(STRIPE_R5C_PREFLUSH, &sh->state); 5409 /* we only need flush for one stripe */ 5410 do_flush = false; 5411 } 5412
5413 set_bit(STRIPE_HANDLE, &sh->state); 5414 clear_bit(STRIPE_DELAYED, &sh->state); 5415 if ((!sh->batch_head \|\| sh == sh->batch_head) && 5416 (bi->bi_opf & REQ_SYNC) && 5417 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 5418 atomic_inc(&conf->preread_active_stripes); 5419 release_stripe_plug(mddev, sh); 5420 } else { --- 905 unchanged lines hidden (view full) --- 6326 6327static struct attribute *raid5_attrs[] = { 6328 &raid5_stripecache_size.attr, 6329 &raid5_stripecache_active.attr, 6330 &raid5_preread_bypass_threshold.attr, 6331 &raid5_group_thread_cnt.attr, 6332 &raid5_skip_copy.attr, 6333 &raid5_rmw_level.attr,	5319 set_bit(STRIPE_HANDLE, &sh->state); 5320 clear_bit(STRIPE_DELAYED, &sh->state); 5321 if ((!sh->batch_head \|\| sh == sh->batch_head) && 5322 (bi->bi_opf & REQ_SYNC) && 5323 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 5324 atomic_inc(&conf->preread_active_stripes); 5325 release_stripe_plug(mddev, sh); 5326 } else { --- 905 unchanged lines hidden (view full) --- 6232 6233static struct attribute *raid5_attrs[] = { 6234 &raid5_stripecache_size.attr, 6235 &raid5_stripecache_active.attr, 6236 &raid5_preread_bypass_threshold.attr, 6237 &raid5_group_thread_cnt.attr, 6238 &raid5_skip_copy.attr, 6239 &raid5_rmw_level.attr,
6334 &r5c_journal_mode.attr,
6335 NULL, 6336}; 6337static struct attribute_group raid5_attrs_group = { 6338 .name = NULL, 6339 .attrs = raid5_attrs, 6340}; 6341 6342static int alloc_thread_groups(struct r5conf conf, int cnt, --- 129 unchanged lines hidden* (view full) --- 6472} 6473 6474static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node node) 6475{ 6476 struct r5conf conf = hlist_entry_safe(node, struct r5conf, node); 6477 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); 6478 6479 if (alloc_scratch_buffer(conf, percpu)) {	6240 NULL, 6241}; 6242static struct attribute_group raid5_attrs_group = { 6243 .name = NULL, 6244 .attrs = raid5_attrs, 6245}; 6246 6247static int alloc_thread_groups(struct r5conf conf, int cnt, --- 129 unchanged lines hidden* (view full) --- 6377} 6378 6379static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node node) 6380{ 6381 struct r5conf conf = hlist_entry_safe(node, struct r5conf, node); 6382 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); 6383 6384 if (alloc_scratch_buffer(conf, percpu)) {
6480 pr_warn("%s: failed memory allocation for cpu%u\n", 6481 __func__, cpu);	6385 pr_err("%s: failed memory allocation for cpu%u\n", 6386 __func__, cpu);
6482 return -ENOMEM; 6483 } 6484 return 0; 6485} 6486 6487static int raid5_alloc_percpu(struct r5conf conf) 6488{ 6489 int err = 0; --- 53 unchanged lines hidden* (view full) --- 6543 char pers_name[6]; 6544 int i; 6545 int group_cnt, worker_cnt_per_group; 6546 struct r5worker_group *new_group; 6547 6548 if (mddev->new_level != 5 6549 && mddev->new_level != 4 6550 && mddev->new_level != 6) {	6387 return -ENOMEM; 6388 } 6389 return 0; 6390} 6391 6392static int raid5_alloc_percpu(struct r5conf conf) 6393{ 6394 int err = 0; --- 53 unchanged lines hidden* (view full) --- 6448 char pers_name[6]; 6449 int i; 6450 int group_cnt, worker_cnt_per_group; 6451 struct r5worker_group *new_group; 6452 6453 if (mddev->new_level != 5 6454 && mddev->new_level != 4 6455 && mddev->new_level != 6) {
6551 pr_warn("md/raid:%s: raid level not set to 4/5/6 (%d)\n", 6552 mdname(mddev), mddev->new_level);	6456 printk(KERN_ERR "md/raid:%s: raid level not set to 4/5/6 (%d)\n", 6457 mdname(mddev), mddev->new_level);
6553 return ERR_PTR(-EIO); 6554 } 6555 if ((mddev->new_level == 5 6556 && !algorithm_valid_raid5(mddev->new_layout)) \|\| 6557 (mddev->new_level == 6 6558 && !algorithm_valid_raid6(mddev->new_layout))) {	6458 return ERR_PTR(-EIO); 6459 } 6460 if ((mddev->new_level == 5 6461 && !algorithm_valid_raid5(mddev->new_layout)) \|\| 6462 (mddev->new_level == 6 6463 && !algorithm_valid_raid6(mddev->new_layout))) {
6559 pr_warn("md/raid:%s: layout %d not supported\n", 6560 mdname(mddev), mddev->new_layout);	6464 printk(KERN_ERR "md/raid:%s: layout %d not supported\n", 6465 mdname(mddev), mddev->new_layout);
6561 return ERR_PTR(-EIO); 6562 } 6563 if (mddev->new_level == 6 && mddev->raid_disks < 4) {	6466 return ERR_PTR(-EIO); 6467 } 6468 if (mddev->new_level == 6 && mddev->raid_disks < 4) {
6564 pr_warn("md/raid:%s: not enough configured devices (%d, minimum 4)\n", 6565 mdname(mddev), mddev->raid_disks);	6469 printk(KERN_ERR "md/raid:%s: not enough configured devices (%d, minimum 4)\n", 6470 mdname(mddev), mddev->raid_disks);
6566 return ERR_PTR(-EINVAL); 6567 } 6568 6569 if (!mddev->new_chunk_sectors \|\| 6570 (mddev->new_chunk_sectors << 9) % PAGE_SIZE \|\| 6571 !is_power_of_2(mddev->new_chunk_sectors)) {	6471 return ERR_PTR(-EINVAL); 6472 } 6473 6474 if (!mddev->new_chunk_sectors \|\| 6475 (mddev->new_chunk_sectors << 9) % PAGE_SIZE \|\| 6476 !is_power_of_2(mddev->new_chunk_sectors)) {
6572 pr_warn("md/raid:%s: invalid chunk size %d\n", 6573 mdname(mddev), mddev->new_chunk_sectors << 9);	6477 printk(KERN_ERR "md/raid:%s: invalid chunk size %d\n", 6478 mdname(mddev), mddev->new_chunk_sectors << 9);
6574 return ERR_PTR(-EINVAL); 6575 } 6576 6577 conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); 6578 if (conf == NULL) 6579 goto abort; 6580 /* Don't enable multi-threading by default/ 6581 if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group, --- 48 unchanged lines hidden* (view full) --- 6630 spin_lock_init(conf->hash_locks + i); 6631 6632 for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) 6633 INIT_LIST_HEAD(conf->inactive_list + i); 6634 6635 for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) 6636 INIT_LIST_HEAD(conf->temp_inactive_list + i); 6637	6479 return ERR_PTR(-EINVAL); 6480 } 6481 6482 conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); 6483 if (conf == NULL) 6484 goto abort; 6485 /* Don't enable multi-threading by default/ 6486 if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group, --- 48 unchanged lines hidden* (view full) --- 6535 spin_lock_init(conf->hash_locks + i); 6536 6537 for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) 6538 INIT_LIST_HEAD(conf->inactive_list + i); 6539 6540 for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) 6541 INIT_LIST_HEAD(conf->temp_inactive_list + i); 6542
6638 atomic_set(&conf->r5c_cached_full_stripes, 0); 6639 INIT_LIST_HEAD(&conf->r5c_full_stripe_list); 6640 atomic_set(&conf->r5c_cached_partial_stripes, 0); 6641 INIT_LIST_HEAD(&conf->r5c_partial_stripe_list); 6642
6643 conf->level = mddev->new_level; 6644 conf->chunk_sectors = mddev->new_chunk_sectors; 6645 if (raid5_alloc_percpu(conf) != 0) 6646 goto abort; 6647 6648 pr_debug("raid456: run(%s) called.\n", mdname(mddev)); 6649 6650 rdev_for_each(rdev, mddev) { --- 10 unchanged lines hidden (view full) --- 6661 } else { 6662 if (disk->rdev) 6663 goto abort; 6664 disk->rdev = rdev; 6665 } 6666 6667 if (test_bit(In_sync, &rdev->flags)) { 6668 char b[BDEVNAME_SIZE];	6543 conf->level = mddev->new_level; 6544 conf->chunk_sectors = mddev->new_chunk_sectors; 6545 if (raid5_alloc_percpu(conf) != 0) 6546 goto abort; 6547 6548 pr_debug("raid456: run(%s) called.\n", mdname(mddev)); 6549 6550 rdev_for_each(rdev, mddev) { --- 10 unchanged lines hidden (view full) --- 6561 } else { 6562 if (disk->rdev) 6563 goto abort; 6564 disk->rdev = rdev; 6565 } 6566 6567 if (test_bit(In_sync, &rdev->flags)) { 6568 char b[BDEVNAME_SIZE];
6669 pr_info("md/raid:%s: device %s operational as raid disk %d\n", 6670 mdname(mddev), bdevname(rdev->bdev, b), raid_disk);	6569 printk(KERN_INFO "md/raid:%s: device %s operational as raid" 6570 " disk %d\n", 6571 mdname(mddev), bdevname(rdev->bdev, b), raid_disk);
6671 } else if (rdev->saved_raid_disk != raid_disk) 6672 /* Cannot rely on bitmap to complete recovery / 6673 conf->fullsync = 1; 6674 } 6675 6676 conf->level = mddev->new_level; 6677 if (conf->level == 6) { 6678 conf->max_degraded = 2; --- 17 unchanged lines hidden* (view full) --- 6696 6697 conf->min_nr_stripes = NR_STRIPES; 6698 if (mddev->reshape_position != MaxSector) { 6699 int stripes = max_t(int, 6700 ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, 6701 ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); 6702 conf->min_nr_stripes = max(NR_STRIPES, stripes); 6703 if (conf->min_nr_stripes != NR_STRIPES)	6572 } else if (rdev->saved_raid_disk != raid_disk) 6573 /* Cannot rely on bitmap to complete recovery / 6574 conf->fullsync = 1; 6575 } 6576 6577 conf->level = mddev->new_level; 6578 if (conf->level == 6) { 6579 conf->max_degraded = 2; --- 17 unchanged lines hidden* (view full) --- 6597 6598 conf->min_nr_stripes = NR_STRIPES; 6599 if (mddev->reshape_position != MaxSector) { 6600 int stripes = max_t(int, 6601 ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, 6602 ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); 6603 conf->min_nr_stripes = max(NR_STRIPES, stripes); 6604 if (conf->min_nr_stripes != NR_STRIPES)
6704 pr_info("md/raid:%s: force stripe size %d for reshape\n",	6605 printk(KERN_INFO 6606 "md/raid:%s: force stripe size %d for reshape\n",
6705 mdname(mddev), conf->min_nr_stripes); 6706 } 6707 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + 6708 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; 6709 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); 6710 if (grow_stripes(conf, conf->min_nr_stripes)) {	6607 mdname(mddev), conf->min_nr_stripes); 6608 } 6609 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + 6610 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; 6611 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); 6612 if (grow_stripes(conf, conf->min_nr_stripes)) {
6711 pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n", 6712 mdname(mddev), memory);	6613 printk(KERN_ERR 6614 "md/raid:%s: couldn't allocate %dkB for buffers\n", 6615 mdname(mddev), memory);
6713 goto abort; 6714 } else	6616 goto abort; 6617 } else
6715 pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory);	6618 printk(KERN_INFO "md/raid:%s: allocated %dkB\n", 6619 mdname(mddev), memory);
6716 /* 6717 * Losing a stripe head costs more than the time to refill it, 6718 * it reduces the queue depth and so can hurt throughput. 6719 * So set it rather large, scaled by number of devices. 6720 / 6721 conf->shrinker.seeks = DEFAULT_SEEKS conf->raid_disks * 4; 6722 conf->shrinker.scan_objects = raid5_cache_scan; 6723 conf->shrinker.count_objects = raid5_cache_count; 6724 conf->shrinker.batch = 128; 6725 conf->shrinker.flags = 0; 6726 if (register_shrinker(&conf->shrinker)) {	6620 /* 6621 * Losing a stripe head costs more than the time to refill it, 6622 * it reduces the queue depth and so can hurt throughput. 6623 * So set it rather large, scaled by number of devices. 6624 / 6625 conf->shrinker.seeks = DEFAULT_SEEKS conf->raid_disks * 4; 6626 conf->shrinker.scan_objects = raid5_cache_scan; 6627 conf->shrinker.count_objects = raid5_cache_count; 6628 conf->shrinker.batch = 128; 6629 conf->shrinker.flags = 0; 6630 if (register_shrinker(&conf->shrinker)) {
6727 pr_warn("md/raid:%s: couldn't register shrinker.\n", 6728 mdname(mddev));	6631 printk(KERN_ERR 6632 "md/raid:%s: couldn't register shrinker.\n", 6633 mdname(mddev));
6729 goto abort; 6730 } 6731 6732 sprintf(pers_name, "raid%d", mddev->new_level); 6733 conf->thread = md_register_thread(raid5d, mddev, pers_name); 6734 if (!conf->thread) {	6634 goto abort; 6635 } 6636 6637 sprintf(pers_name, "raid%d", mddev->new_level); 6638 conf->thread = md_register_thread(raid5d, mddev, pers_name); 6639 if (!conf->thread) {
6735 pr_warn("md/raid:%s: couldn't allocate thread.\n", 6736 mdname(mddev));	6640 printk(KERN_ERR 6641 "md/raid:%s: couldn't allocate thread.\n", 6642 mdname(mddev));
6737 goto abort; 6738 } 6739 6740 return conf; 6741 6742 abort: 6743 if (conf) { 6744 free_conf(conf); --- 36 unchanged lines hidden (view full) --- 6781 struct md_rdev rdev; 6782 struct md_rdev journal_dev = NULL; 6783 sector_t reshape_offset = 0; 6784 int i; 6785 long long min_offset_diff = 0; 6786 int first = 1; 6787 6788 if (mddev->recovery_cp != MaxSector)	6643 goto abort; 6644 } 6645 6646 return conf; 6647 6648 abort: 6649 if (conf) { 6650 free_conf(conf); --- 36 unchanged lines hidden (view full) --- 6687 struct md_rdev rdev; 6688 struct md_rdev journal_dev = NULL; 6689 sector_t reshape_offset = 0; 6690 int i; 6691 long long min_offset_diff = 0; 6692 int first = 1; 6693 6694 if (mddev->recovery_cp != MaxSector)
6789 pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", 6790 mdname(mddev));	6695 printk(KERN_NOTICE "md/raid:%s: not clean" 6696 " -- starting background reconstruction\n", 6697 mdname(mddev));
6791 6792 rdev_for_each(rdev, mddev) { 6793 long long diff; 6794 6795 if (test_bit(Journal, &rdev->flags)) { 6796 journal_dev = rdev; 6797 continue; 6798 } --- 26 unchanged lines hidden (view full) --- 6825 */ 6826 sector_t here_new, here_old; 6827 int old_disks; 6828 int max_degraded = (mddev->level == 6 ? 2 : 1); 6829 int chunk_sectors; 6830 int new_data_disks; 6831 6832 if (journal_dev) {	6698 6699 rdev_for_each(rdev, mddev) { 6700 long long diff; 6701 6702 if (test_bit(Journal, &rdev->flags)) { 6703 journal_dev = rdev; 6704 continue; 6705 } --- 26 unchanged lines hidden (view full) --- 6732 */ 6733 sector_t here_new, here_old; 6734 int old_disks; 6735 int max_degraded = (mddev->level == 6 ? 2 : 1); 6736 int chunk_sectors; 6737 int new_data_disks; 6738 6739 if (journal_dev) {
6833 pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", 6834 mdname(mddev));	6740 printk(KERN_ERR "md/raid:%s: don't support reshape with journal - aborting.\n", 6741 mdname(mddev));
6835 return -EINVAL; 6836 } 6837 6838 if (mddev->new_level != mddev->level) {	6742 return -EINVAL; 6743 } 6744 6745 if (mddev->new_level != mddev->level) {
6839 pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", 6840 mdname(mddev));	6746 printk(KERN_ERR "md/raid:%s: unsupported reshape " 6747 "required - aborting.\n", 6748 mdname(mddev));
6841 return -EINVAL; 6842 } 6843 old_disks = mddev->raid_disks - mddev->delta_disks; 6844 /* reshape_position must be on a new-stripe boundary, and one 6845 * further up in new geometry must map after here in old 6846 * geometry. 6847 * If the chunk sizes are different, then as we perform reshape 6848 * in units of the largest of the two, reshape_position needs 6849 * be a multiple of the largest chunk size times new data disks. 6850 / 6851 here_new = mddev->reshape_position; 6852 chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors); 6853 new_data_disks = mddev->raid_disks - max_degraded; 6854 if (sector_div(here_new, chunk_sectors new_data_disks)) {	6749 return -EINVAL; 6750 } 6751 old_disks = mddev->raid_disks - mddev->delta_disks; 6752 /* reshape_position must be on a new-stripe boundary, and one 6753 * further up in new geometry must map after here in old 6754 * geometry. 6755 * If the chunk sizes are different, then as we perform reshape 6756 * in units of the largest of the two, reshape_position needs 6757 * be a multiple of the largest chunk size times new data disks. 6758 / 6759 here_new = mddev->reshape_position; 6760 chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors); 6761 new_data_disks = mddev->raid_disks - max_degraded; 6762 if (sector_div(here_new, chunk_sectors new_data_disks)) {
6855 pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n", 6856 mdname(mddev));	6763 printk(KERN_ERR "md/raid:%s: reshape_position not " 6764 "on a stripe boundary\n", mdname(mddev));
6857 return -EINVAL; 6858 } 6859 reshape_offset = here_new * chunk_sectors; 6860 /* here_new is the stripe we will write to / 6861 here_old = mddev->reshape_position; 6862 sector_div(here_old, chunk_sectors (old_disks-max_degraded)); 6863 /* here_old is the first stripe that we might need to read 6864 * from / --- 4 unchanged lines hidden* (view full) --- 6869 * mdadm always starts a situation like this in 6870 * readonly mode so it can take control before 6871 * allowing any writes. So just check for that. 6872 / 6873 if (abs(min_offset_diff) >= mddev->chunk_sectors && 6874 abs(min_offset_diff) >= mddev->new_chunk_sectors) 6875 / not really in-place - so OK */; 6876 else if (mddev->ro == 0) {	6765 return -EINVAL; 6766 } 6767 reshape_offset = here_new * chunk_sectors; 6768 /* here_new is the stripe we will write to / 6769 here_old = mddev->reshape_position; 6770 sector_div(here_old, chunk_sectors (old_disks-max_degraded)); 6771 /* here_old is the first stripe that we might need to read 6772 * from / --- 4 unchanged lines hidden* (view full) --- 6777 * mdadm always starts a situation like this in 6778 * readonly mode so it can take control before 6779 * allowing any writes. So just check for that. 6780 / 6781 if (abs(min_offset_diff) >= mddev->chunk_sectors && 6782 abs(min_offset_diff) >= mddev->new_chunk_sectors) 6783 / not really in-place - so OK */; 6784 else if (mddev->ro == 0) {
6877 pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", 6878 mdname(mddev));	6785 printk(KERN_ERR "md/raid:%s: in-place reshape " 6786 "must be started in read-only mode " 6787 "- aborting\n", 6788 mdname(mddev));
6879 return -EINVAL; 6880 } 6881 } else if (mddev->reshape_backwards 6882 ? (here_new * chunk_sectors + min_offset_diff <= 6883 here_old * chunk_sectors) 6884 : (here_new * chunk_sectors >= 6885 here_old * chunk_sectors + (-min_offset_diff))) { 6886 /* Reading from the same stripe as writing to - bad */	6789 return -EINVAL; 6790 } 6791 } else if (mddev->reshape_backwards 6792 ? (here_new * chunk_sectors + min_offset_diff <= 6793 here_old * chunk_sectors) 6794 : (here_new * chunk_sectors >= 6795 here_old * chunk_sectors + (-min_offset_diff))) { 6796 /* Reading from the same stripe as writing to - bad */
6887 pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", 6888 mdname(mddev));	6797 printk(KERN_ERR "md/raid:%s: reshape_position too early for " 6798 "auto-recovery - aborting.\n", 6799 mdname(mddev));
6889 return -EINVAL; 6890 }	6800 return -EINVAL; 6801 }
6891 pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev));	6802 printk(KERN_INFO "md/raid:%s: reshape will continue\n", 6803 mdname(mddev));
6892 /* OK, we should be able to continue; */ 6893 } else { 6894 BUG_ON(mddev->level != mddev->new_level); 6895 BUG_ON(mddev->layout != mddev->new_layout); 6896 BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); 6897 BUG_ON(mddev->delta_disks != 0); 6898 } 6899 6900 if (mddev->private == NULL) 6901 conf = setup_conf(mddev); 6902 else 6903 conf = mddev->private; 6904 6905 if (IS_ERR(conf)) 6906 return PTR_ERR(conf); 6907 6908 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 6909 if (!journal_dev) {	6804 /* OK, we should be able to continue; */ 6805 } else { 6806 BUG_ON(mddev->level != mddev->new_level); 6807 BUG_ON(mddev->layout != mddev->new_layout); 6808 BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); 6809 BUG_ON(mddev->delta_disks != 0); 6810 } 6811 6812 if (mddev->private == NULL) 6813 conf = setup_conf(mddev); 6814 else 6815 conf = mddev->private; 6816 6817 if (IS_ERR(conf)) 6818 return PTR_ERR(conf); 6819 6820 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 6821 if (!journal_dev) {
6910 pr_warn("md/raid:%s: journal disk is missing, force array readonly\n", 6911 mdname(mddev));	6822 pr_err("md/raid:%s: journal disk is missing, force array readonly\n", 6823 mdname(mddev));
6912 mddev->ro = 1; 6913 set_disk_ro(mddev->gendisk, 1); 6914 } else if (mddev->recovery_cp == MaxSector) 6915 set_bit(MD_JOURNAL_CLEAN, &mddev->flags); 6916 } 6917 6918 conf->min_offset_diff = min_offset_diff; 6919 mddev->thread = conf->thread; --- 10 unchanged lines hidden (view full) --- 6930 clear_bit(Replacement, &rdev->flags); 6931 conf->disks[i].rdev = rdev; 6932 } 6933 if (!rdev) 6934 continue; 6935 if (conf->disks[i].replacement && 6936 conf->reshape_progress != MaxSector) { 6937 /* replacements and reshape simply do not mix. */	6824 mddev->ro = 1; 6825 set_disk_ro(mddev->gendisk, 1); 6826 } else if (mddev->recovery_cp == MaxSector) 6827 set_bit(MD_JOURNAL_CLEAN, &mddev->flags); 6828 } 6829 6830 conf->min_offset_diff = min_offset_diff; 6831 mddev->thread = conf->thread; --- 10 unchanged lines hidden (view full) --- 6842 clear_bit(Replacement, &rdev->flags); 6843 conf->disks[i].rdev = rdev; 6844 } 6845 if (!rdev) 6846 continue; 6847 if (conf->disks[i].replacement && 6848 conf->reshape_progress != MaxSector) { 6849 /* replacements and reshape simply do not mix. */
6938 pr_warn("md: cannot handle concurrent replacement and reshape.\n");	6850 printk(KERN_ERR "md: cannot handle concurrent " 6851 "replacement and reshape.\n");
6939 goto abort; 6940 } 6941 if (test_bit(In_sync, &rdev->flags)) { 6942 working_disks++; 6943 continue; 6944 } 6945 /* This disc is not fully in-sync. However if it 6946 * just stored parity (beyond the recovery_offset), --- 25 unchanged lines hidden (view full) --- 6972 } 6973 6974 /* 6975 * 0 for a fully functional array, 1 or 2 for a degraded array. 6976 */ 6977 mddev->degraded = calc_degraded(conf); 6978 6979 if (has_failed(conf)) {	6852 goto abort; 6853 } 6854 if (test_bit(In_sync, &rdev->flags)) { 6855 working_disks++; 6856 continue; 6857 } 6858 /* This disc is not fully in-sync. However if it 6859 * just stored parity (beyond the recovery_offset), --- 25 unchanged lines hidden (view full) --- 6885 } 6886 6887 /* 6888 * 0 for a fully functional array, 1 or 2 for a degraded array. 6889 */ 6890 mddev->degraded = calc_degraded(conf); 6891 6892 if (has_failed(conf)) {
6980 pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n",	6893 printk(KERN_ERR "md/raid:%s: not enough operational devices" 6894 " (%d/%d failed)\n",
6981 mdname(mddev), mddev->degraded, conf->raid_disks); 6982 goto abort; 6983 } 6984 6985 /* device size must be a multiple of chunk size */ 6986 mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); 6987 mddev->resync_max_sectors = mddev->dev_sectors; 6988 6989 if (mddev->degraded > dirty_parity_disks && 6990 mddev->recovery_cp != MaxSector) { 6991 if (mddev->ok_start_degraded)	6895 mdname(mddev), mddev->degraded, conf->raid_disks); 6896 goto abort; 6897 } 6898 6899 /* device size must be a multiple of chunk size */ 6900 mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); 6901 mddev->resync_max_sectors = mddev->dev_sectors; 6902 6903 if (mddev->degraded > dirty_parity_disks && 6904 mddev->recovery_cp != MaxSector) { 6905 if (mddev->ok_start_degraded)
6992 pr_crit("md/raid:%s: starting dirty degraded array - data corruption possible.\n", 6993 mdname(mddev));	6906 printk(KERN_WARNING 6907 "md/raid:%s: starting dirty degraded array" 6908 " - data corruption possible.\n", 6909 mdname(mddev));
6994 else {	6910 else {
6995 pr_crit("md/raid:%s: cannot start dirty degraded array.\n", 6996 mdname(mddev));	6911 printk(KERN_ERR 6912 "md/raid:%s: cannot start dirty degraded array.\n", 6913 mdname(mddev));
6997 goto abort; 6998 } 6999 } 7000	6914 goto abort; 6915 } 6916 } 6917
7001 pr_info("md/raid:%s: raid level %d active with %d out of %d devices, algorithm %d\n", 7002 mdname(mddev), conf->level, 7003 mddev->raid_disks-mddev->degraded, mddev->raid_disks, 7004 mddev->new_layout);	6918 if (mddev->degraded == 0) 6919 printk(KERN_INFO "md/raid:%s: raid level %d active with %d out of %d" 6920 " devices, algorithm %d\n", mdname(mddev), conf->level, 6921 mddev->raid_disks-mddev->degraded, mddev->raid_disks, 6922 mddev->new_layout); 6923 else 6924 printk(KERN_ALERT "md/raid:%s: raid level %d active with %d" 6925 " out of %d devices, algorithm %d\n", 6926 mdname(mddev), conf->level, 6927 mddev->raid_disks - mddev->degraded, 6928 mddev->raid_disks, mddev->new_layout);
7005 7006 print_raid5_conf(conf); 7007 7008 if (conf->reshape_progress != MaxSector) { 7009 conf->reshape_safe = conf->reshape_progress; 7010 atomic_set(&conf->reshape_stripes, 0); 7011 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 7012 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 7013 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 7014 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 7015 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 7016 "reshape"); 7017 } 7018 7019 /* Ok, everything is just fine now */ 7020 if (mddev->to_remove == &raid5_attrs_group) 7021 mddev->to_remove = NULL; 7022 else if (mddev->kobj.sd && 7023 sysfs_create_group(&mddev->kobj, &raid5_attrs_group))	6929 6930 print_raid5_conf(conf); 6931 6932 if (conf->reshape_progress != MaxSector) { 6933 conf->reshape_safe = conf->reshape_progress; 6934 atomic_set(&conf->reshape_stripes, 0); 6935 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 6936 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 6937 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 6938 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 6939 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 6940 "reshape"); 6941 } 6942 6943 /* Ok, everything is just fine now */ 6944 if (mddev->to_remove == &raid5_attrs_group) 6945 mddev->to_remove = NULL; 6946 else if (mddev->kobj.sd && 6947 sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
7024 pr_warn("raid5: failed to create sysfs attributes for %s\n", 7025 mdname(mddev));	6948 printk(KERN_WARNING 6949 "raid5: failed to create sysfs attributes for %s\n", 6950 mdname(mddev));
7026 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 7027 7028 if (mddev->queue) { 7029 int chunk_size; 7030 bool discard_supported = true; 7031 /* read-ahead size must cover two whole stripes, which 7032 * is 2 * (datadisks) * chunksize where 'n' is the 7033 * number of raid devices --- 71 unchanged lines hidden (view full) --- 7105 mddev->queue); 7106 7107 blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); 7108 } 7109 7110 if (journal_dev) { 7111 char b[BDEVNAME_SIZE]; 7112	6951 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 6952 6953 if (mddev->queue) { 6954 int chunk_size; 6955 bool discard_supported = true; 6956 /* read-ahead size must cover two whole stripes, which 6957 * is 2 * (datadisks) * chunksize where 'n' is the 6958 * number of raid devices --- 71 unchanged lines hidden (view full) --- 7030 mddev->queue); 7031 7032 blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); 7033 } 7034 7035 if (journal_dev) { 7036 char b[BDEVNAME_SIZE]; 7037
7113 pr_debug("md/raid:%s: using device %s as journal\n", 7114 mdname(mddev), bdevname(journal_dev->bdev, b)); 7115 if (r5l_init_log(conf, journal_dev)) 7116 goto abort;	7038 printk(KERN_INFO"md/raid:%s: using device %s as journal\n", 7039 mdname(mddev), bdevname(journal_dev->bdev, b)); 7040 r5l_init_log(conf, journal_dev);
7117 } 7118 7119 return 0; 7120abort: 7121 md_unregister_thread(&mddev->thread); 7122 print_raid5_conf(conf); 7123 free_conf(conf); 7124 mddev->private = NULL;	7041 } 7042 7043 return 0; 7044abort: 7045 md_unregister_thread(&mddev->thread); 7046 print_raid5_conf(conf); 7047 free_conf(conf); 7048 mddev->private = NULL;
7125 pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));	7049 printk(KERN_ALERT "md/raid:%s: failed to run raid set.\n", mdname(mddev));
7126 return -EIO; 7127} 7128 7129static void raid5_free(struct mddev mddev, void priv) 7130{ 7131 struct r5conf conf = priv; 7132 7133 free_conf(conf); --- 17 unchanged lines hidden* (view full) --- 7151 seq_printf (seq, "]"); 7152} 7153 7154static void print_raid5_conf (struct r5conf conf) 7155{ 7156 int i; 7157 struct disk_info tmp; 7158	7050 return -EIO; 7051} 7052 7053static void raid5_free(struct mddev mddev, void priv) 7054{ 7055 struct r5conf conf = priv; 7056 7057 free_conf(conf); --- 17 unchanged lines hidden* (view full) --- 7075 seq_printf (seq, "]"); 7076} 7077 7078static void print_raid5_conf (struct r5conf conf) 7079{ 7080 int i; 7081 struct disk_info tmp; 7082
7159 pr_debug("RAID conf printout:\n");	7083 printk(KERN_DEBUG "RAID conf printout:\n");
7160 if (!conf) {	7084 if (!conf) {
7161 pr_debug("(conf==NULL)\n");	7085 printk("(conf==NULL)\n");
7162 return; 7163 }	7086 return; 7087 }
7164 pr_debug(" --- level:%d rd:%d wd:%d\n", conf->level,	7088 printk(KERN_DEBUG " --- level:%d rd:%d wd:%d\n", conf->level,
7165 conf->raid_disks, 7166 conf->raid_disks - conf->mddev->degraded); 7167 7168 for (i = 0; i < conf->raid_disks; i++) { 7169 char b[BDEVNAME_SIZE]; 7170 tmp = conf->disks + i; 7171 if (tmp->rdev)	7089 conf->raid_disks, 7090 conf->raid_disks - conf->mddev->degraded); 7091 7092 for (i = 0; i < conf->raid_disks; i++) { 7093 char b[BDEVNAME_SIZE]; 7094 tmp = conf->disks + i; 7095 if (tmp->rdev)
7172 pr_debug(" disk %d, o:%d, dev:%s\n",	7096 printk(KERN_DEBUG " disk %d, o:%d, dev:%s\n",
7173 i, !test_bit(Faulty, &tmp->rdev->flags), 7174 bdevname(tmp->rdev->bdev, b)); 7175 } 7176} 7177 7178static int raid5_spare_active(struct mddev mddev) 7179{ 7180 int i; --- 131 unchanged lines hidden* (view full) --- 7312 return -EBUSY; 7313 7314 rdev->raid_disk = 0; 7315 /* 7316 * The array is in readonly mode if journal is missing, so no 7317 * write requests running. We should be safe 7318 */ 7319 r5l_init_log(conf, rdev);	7097 i, !test_bit(Faulty, &tmp->rdev->flags), 7098 bdevname(tmp->rdev->bdev, b)); 7099 } 7100} 7101 7102static int raid5_spare_active(struct mddev mddev) 7103{ 7104 int i; --- 131 unchanged lines hidden* (view full) --- 7236 return -EBUSY; 7237 7238 rdev->raid_disk = 0; 7239 /* 7240 * The array is in readonly mode if journal is missing, so no 7241 * write requests running. We should be safe 7242 */ 7243 r5l_init_log(conf, rdev);
7320 pr_debug("md/raid:%s: using device %s as journal\n", 7321 mdname(mddev), bdevname(rdev->bdev, b));	7244 printk(KERN_INFO"md/raid:%s: using device %s as journal\n", 7245 mdname(mddev), bdevname(rdev->bdev, b));
7322 return 0; 7323 } 7324 if (mddev->recovery_disabled == conf->recovery_disabled) 7325 return -EBUSY; 7326 7327 if (rdev->saved_raid_disk < 0 && has_failed(conf)) 7328 /* no point adding a device / 7329 return -EINVAL; --- 87 unchanged lines hidden* (view full) --- 7417 * If the chunk size is greater, user-space should request more 7418 * stripe_heads first. 7419 / 7420 struct r5conf conf = mddev->private; 7421 if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 7422 > conf->min_nr_stripes \|\| 7423 ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 7424 > conf->min_nr_stripes) {	7246 return 0; 7247 } 7248 if (mddev->recovery_disabled == conf->recovery_disabled) 7249 return -EBUSY; 7250 7251 if (rdev->saved_raid_disk < 0 && has_failed(conf)) 7252 /* no point adding a device / 7253 return -EINVAL; --- 87 unchanged lines hidden* (view full) --- 7341 * If the chunk size is greater, user-space should request more 7342 * stripe_heads first. 7343 / 7344 struct r5conf conf = mddev->private; 7345 if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 7346 > conf->min_nr_stripes \|\| 7347 ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 7348 > conf->min_nr_stripes) {
7425 pr_warn("md/raid:%s: reshape: not enough stripes. Needed %lu\n", 7426 mdname(mddev), 7427 ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) 7428 / STRIPE_SIZE)*4);	7349 printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes. Needed %lu\n", 7350 mdname(mddev), 7351 ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) 7352 / STRIPE_SIZE)*4);
7429 return 0; 7430 } 7431 return 1; 7432} 7433 7434static int check_reshape(struct mddev mddev) 7435{ 7436 struct r5conf conf = mddev->private; --- 64 unchanged lines hidden (view full) --- 7501 return -EINVAL; 7502 7503 /* Refuse to reduce size of the array. Any reductions in 7504 * array size must be through explicit setting of array_size 7505 * attribute. 7506 */ 7507 if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks) 7508 < mddev->array_sectors) {	7353 return 0; 7354 } 7355 return 1; 7356} 7357 7358static int check_reshape(struct mddev mddev) 7359{ 7360 struct r5conf conf = mddev->private; --- 64 unchanged lines hidden (view full) --- 7425 return -EINVAL; 7426 7427 /* Refuse to reduce size of the array. Any reductions in 7428 * array size must be through explicit setting of array_size 7429 * attribute. 7430 */ 7431 if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks) 7432 < mddev->array_sectors) {
7509 pr_warn("md/raid:%s: array size must be reduced before number of disks\n", 7510 mdname(mddev));	7433 printk(KERN_ERR "md/raid:%s: array size must be reduced " 7434 "before number of disks\n", mdname(mddev));
7511 return -EINVAL; 7512 } 7513 7514 atomic_set(&conf->reshape_stripes, 0); 7515 spin_lock_irq(&conf->device_lock); 7516 write_seqcount_begin(&conf->gen_lock); 7517 conf->previous_raid_disks = conf->raid_disks; 7518 conf->raid_disks += mddev->delta_disks; --- 171 unchanged lines hidden (view full) --- 7690 wake_up(&conf->wait_for_overlap); 7691 break; 7692 7693 case 1: /* stop all writes / 7694 lock_all_device_hash_locks_irq(conf); 7695 / '2' tells resync/reshape to pause so that all 7696 * active stripes can drain 7697 */	7435 return -EINVAL; 7436 } 7437 7438 atomic_set(&conf->reshape_stripes, 0); 7439 spin_lock_irq(&conf->device_lock); 7440 write_seqcount_begin(&conf->gen_lock); 7441 conf->previous_raid_disks = conf->raid_disks; 7442 conf->raid_disks += mddev->delta_disks; --- 171 unchanged lines hidden (view full) --- 7614 wake_up(&conf->wait_for_overlap); 7615 break; 7616 7617 case 1: /* stop all writes / 7618 lock_all_device_hash_locks_irq(conf); 7619 / '2' tells resync/reshape to pause so that all 7620 * active stripes can drain 7621 */
7698 r5c_flush_cache(conf, INT_MAX);
7699 conf->quiesce = 2; 7700 wait_event_cmd(conf->wait_for_quiescent, 7701 atomic_read(&conf->active_stripes) == 0 && 7702 atomic_read(&conf->active_aligned_reads) == 0, 7703 unlock_all_device_hash_locks_irq(conf), 7704 lock_all_device_hash_locks_irq(conf)); 7705 conf->quiesce = 1; 7706 unlock_all_device_hash_locks_irq(conf); --- 14 unchanged lines hidden (view full) --- 7721 7722static void raid45_takeover_raid0(struct mddev mddev, int level) 7723{ 7724 struct r0conf raid0_conf = mddev->private; 7725 sector_t sectors; 7726 7727 / for raid0 takeover only one zone is supported */ 7728 if (raid0_conf->nr_strip_zones > 1) {	7622 conf->quiesce = 2; 7623 wait_event_cmd(conf->wait_for_quiescent, 7624 atomic_read(&conf->active_stripes) == 0 && 7625 atomic_read(&conf->active_aligned_reads) == 0, 7626 unlock_all_device_hash_locks_irq(conf), 7627 lock_all_device_hash_locks_irq(conf)); 7628 conf->quiesce = 1; 7629 unlock_all_device_hash_locks_irq(conf); --- 14 unchanged lines hidden (view full) --- 7644 7645static void raid45_takeover_raid0(struct mddev mddev, int level) 7646{ 7647 struct r0conf raid0_conf = mddev->private; 7648 sector_t sectors; 7649 7650 / for raid0 takeover only one zone is supported */ 7651 if (raid0_conf->nr_strip_zones > 1) {
7729 pr_warn("md/raid:%s: cannot takeover raid0 with more than one zone.\n", 7730 mdname(mddev));	7652 printk(KERN_ERR "md/raid:%s: cannot takeover raid0 with more than one zone.\n", 7653 mdname(mddev));
7731 return ERR_PTR(-EINVAL); 7732 } 7733 7734 sectors = raid0_conf->strip_zone[0].zone_end; 7735 sector_div(sectors, raid0_conf->strip_zone[0].nb_dev); 7736 mddev->dev_sectors = sectors; 7737 mddev->new_level = level; 7738 mddev->new_layout = ALGORITHM_PARITY_N; --- 336 unchanged lines hidden ---	7654 return ERR_PTR(-EINVAL); 7655 } 7656 7657 sectors = raid0_conf->strip_zone[0].zone_end; 7658 sector_div(sectors, raid0_conf->strip_zone[0].nb_dev); 7659 mddev->dev_sectors = sectors; 7660 mddev->new_level = level; 7661 mddev->new_layout = ALGORITHM_PARITY_N; --- 336 unchanged lines hidden ---