Lines Matching +full:conf +full:- +full:tx

1 // SPDX-License-Identifier: GPL-2.0-or-later
8 * RAID-4/5/6 management functions.
9 * Thanks to Penguin Computing for making the RAID-6 development possible
22 * conf->seq_write is the number of the last batch successfully written.
23 * conf->seq_flush is the number of the last batch that was closed to
26 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
56 #include "md-bitmap.h"
57 #include "raid5-log.h"
72 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) in stripe_hash() argument
74 int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK; in stripe_hash()
75 return &conf->stripe_hashtbl[hash]; in stripe_hash()
78 static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect) in stripe_hash_locks_hash() argument
80 return (sect >> RAID5_STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK; in stripe_hash_locks_hash()
83 static inline void lock_device_hash_lock(struct r5conf *conf, int hash) in lock_device_hash_lock() argument
84 __acquires(&conf->device_lock) in lock_device_hash_lock()
86 spin_lock_irq(conf->hash_locks + hash); in lock_device_hash_lock()
87 spin_lock(&conf->device_lock); in lock_device_hash_lock()
90 static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) in unlock_device_hash_lock() argument
91 __releases(&conf->device_lock) in unlock_device_hash_lock()
93 spin_unlock(&conf->device_lock); in unlock_device_hash_lock()
94 spin_unlock_irq(conf->hash_locks + hash); in unlock_device_hash_lock()
97 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) in lock_all_device_hash_locks_irq() argument
98 __acquires(&conf->device_lock) in lock_all_device_hash_locks_irq()
101 spin_lock_irq(conf->hash_locks); in lock_all_device_hash_locks_irq()
103 spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); in lock_all_device_hash_locks_irq()
104 spin_lock(&conf->device_lock); in lock_all_device_hash_locks_irq()
107 static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) in unlock_all_device_hash_locks_irq() argument
108 __releases(&conf->device_lock) in unlock_all_device_hash_locks_irq()
111 spin_unlock(&conf->device_lock); in unlock_all_device_hash_locks_irq()
112 for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) in unlock_all_device_hash_locks_irq()
113 spin_unlock(conf->hash_locks + i); in unlock_all_device_hash_locks_irq()
114 spin_unlock_irq(conf->hash_locks); in unlock_all_device_hash_locks_irq()
120 if (sh->ddf_layout) in raid6_d0()
124 if (sh->qd_idx == sh->disks - 1) in raid6_d0()
127 return sh->qd_idx + 1; in raid6_d0()
137 * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
138 * is raid_disks-1. This help does that mapping.
145 if (sh->ddf_layout) in raid6_idx_to_slot()
147 if (idx == sh->pd_idx) in raid6_idx_to_slot()
149 if (idx == sh->qd_idx) in raid6_idx_to_slot()
151 if (!sh->ddf_layout) in raid6_idx_to_slot()
156 static void print_raid5_conf (struct r5conf *conf);
160 return sh->check_state || sh->reconstruct_state || in stripe_operations_active()
161 test_bit(STRIPE_BIOFILL_RUN, &sh->state) || in stripe_operations_active()
162 test_bit(STRIPE_COMPUTE_RUN, &sh->state); in stripe_operations_active()
167 return (test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) || in stripe_is_lowprio()
168 test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) && in stripe_is_lowprio()
169 !test_bit(STRIPE_R5C_CACHING, &sh->state); in stripe_is_lowprio()
173 __must_hold(&sh->raid_conf->device_lock) in raid5_wakeup_stripe_thread()
175 struct r5conf *conf = sh->raid_conf; in raid5_wakeup_stripe_thread() local
178 int i, cpu = sh->cpu; in raid5_wakeup_stripe_thread()
182 sh->cpu = cpu; in raid5_wakeup_stripe_thread()
185 if (list_empty(&sh->lru)) { in raid5_wakeup_stripe_thread()
187 group = conf->worker_groups + cpu_to_group(cpu); in raid5_wakeup_stripe_thread()
189 list_add_tail(&sh->lru, &group->loprio_list); in raid5_wakeup_stripe_thread()
191 list_add_tail(&sh->lru, &group->handle_list); in raid5_wakeup_stripe_thread()
192 group->stripes_cnt++; in raid5_wakeup_stripe_thread()
193 sh->group = group; in raid5_wakeup_stripe_thread()
196 if (conf->worker_cnt_per_group == 0) { in raid5_wakeup_stripe_thread()
197 md_wakeup_thread(conf->mddev->thread); in raid5_wakeup_stripe_thread()
201 group = conf->worker_groups + cpu_to_group(sh->cpu); in raid5_wakeup_stripe_thread()
203 group->workers[0].working = true; in raid5_wakeup_stripe_thread()
205 queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work); in raid5_wakeup_stripe_thread()
207 thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1; in raid5_wakeup_stripe_thread()
209 for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) { in raid5_wakeup_stripe_thread()
210 if (group->workers[i].working == false) { in raid5_wakeup_stripe_thread()
211 group->workers[i].working = true; in raid5_wakeup_stripe_thread()
212 queue_work_on(sh->cpu, raid5_wq, in raid5_wakeup_stripe_thread()
213 &group->workers[i].work); in raid5_wakeup_stripe_thread()
214 thread_cnt--; in raid5_wakeup_stripe_thread()
219 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, in do_release_stripe() argument
221 __must_hold(&conf->device_lock) in do_release_stripe()
226 BUG_ON(!list_empty(&sh->lru)); in do_release_stripe()
227 BUG_ON(atomic_read(&conf->active_stripes)==0); in do_release_stripe()
229 if (r5c_is_writeback(conf->log)) in do_release_stripe()
230 for (i = sh->disks; i--; ) in do_release_stripe()
231 if (test_bit(R5_InJournal, &sh->dev[i].flags)) in do_release_stripe()
240 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) || in do_release_stripe()
241 (conf->quiesce && r5c_is_writeback(conf->log) && in do_release_stripe()
242 !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) { in do_release_stripe()
243 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) in do_release_stripe()
245 set_bit(STRIPE_HANDLE, &sh->state); in do_release_stripe()
248 if (test_bit(STRIPE_HANDLE, &sh->state)) { in do_release_stripe()
249 if (test_bit(STRIPE_DELAYED, &sh->state) && in do_release_stripe()
250 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in do_release_stripe()
251 list_add_tail(&sh->lru, &conf->delayed_list); in do_release_stripe()
252 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && in do_release_stripe()
253 sh->bm_seq - conf->seq_write > 0) in do_release_stripe()
254 list_add_tail(&sh->lru, &conf->bitmap_list); in do_release_stripe()
256 clear_bit(STRIPE_DELAYED, &sh->state); in do_release_stripe()
257 clear_bit(STRIPE_BIT_DELAY, &sh->state); in do_release_stripe()
258 if (conf->worker_cnt_per_group == 0) { in do_release_stripe()
260 list_add_tail(&sh->lru, in do_release_stripe()
261 &conf->loprio_list); in do_release_stripe()
263 list_add_tail(&sh->lru, in do_release_stripe()
264 &conf->handle_list); in do_release_stripe()
270 md_wakeup_thread(conf->mddev->thread); in do_release_stripe()
273 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in do_release_stripe()
274 if (atomic_dec_return(&conf->preread_active_stripes) in do_release_stripe()
276 md_wakeup_thread(conf->mddev->thread); in do_release_stripe()
277 atomic_dec(&conf->active_stripes); in do_release_stripe()
278 if (!test_bit(STRIPE_EXPANDING, &sh->state)) { in do_release_stripe()
279 if (!r5c_is_writeback(conf->log)) in do_release_stripe()
280 list_add_tail(&sh->lru, temp_inactive_list); in do_release_stripe()
282 WARN_ON(test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)); in do_release_stripe()
284 list_add_tail(&sh->lru, temp_inactive_list); in do_release_stripe()
285 else if (injournal == conf->raid_disks - conf->max_degraded) { in do_release_stripe()
287 if (!test_and_set_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) in do_release_stripe()
288 atomic_inc(&conf->r5c_cached_full_stripes); in do_release_stripe()
289 if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) in do_release_stripe()
290 atomic_dec(&conf->r5c_cached_partial_stripes); in do_release_stripe()
291 list_add_tail(&sh->lru, &conf->r5c_full_stripe_list); in do_release_stripe()
292 r5c_check_cached_full_stripe(conf); in do_release_stripe()
299 list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list); in do_release_stripe()
305 static void __release_stripe(struct r5conf *conf, struct stripe_head *sh, in __release_stripe() argument
307 __must_hold(&conf->device_lock) in __release_stripe()
309 if (atomic_dec_and_test(&sh->count)) in __release_stripe()
310 do_release_stripe(conf, sh, temp_inactive_list); in __release_stripe()
320 static void release_inactive_stripe_list(struct r5conf *conf, in release_inactive_stripe_list() argument
330 hash = NR_STRIPE_HASH_LOCKS - 1; in release_inactive_stripe_list()
334 struct list_head *list = &temp_inactive_list[size - 1]; in release_inactive_stripe_list()
341 spin_lock_irqsave(conf->hash_locks + hash, flags); in release_inactive_stripe_list()
342 if (list_empty(conf->inactive_list + hash) && in release_inactive_stripe_list()
344 atomic_dec(&conf->empty_inactive_list_nr); in release_inactive_stripe_list()
345 list_splice_tail_init(list, conf->inactive_list + hash); in release_inactive_stripe_list()
347 spin_unlock_irqrestore(conf->hash_locks + hash, flags); in release_inactive_stripe_list()
349 size--; in release_inactive_stripe_list()
350 hash--; in release_inactive_stripe_list()
354 wake_up(&conf->wait_for_stripe); in release_inactive_stripe_list()
355 if (atomic_read(&conf->active_stripes) == 0) in release_inactive_stripe_list()
356 wake_up(&conf->wait_for_quiescent); in release_inactive_stripe_list()
357 if (conf->retry_read_aligned) in release_inactive_stripe_list()
358 md_wakeup_thread(conf->mddev->thread); in release_inactive_stripe_list()
362 static int release_stripe_list(struct r5conf *conf, in release_stripe_list() argument
364 __must_hold(&conf->device_lock) in release_stripe_list()
370 head = llist_del_all(&conf->released_stripes); in release_stripe_list()
377 clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state); in release_stripe_list()
383 hash = sh->hash_lock_index; in release_stripe_list()
384 __release_stripe(conf, sh, &temp_inactive_list[hash]); in release_stripe_list()
393 struct r5conf *conf = sh->raid_conf; in raid5_release_stripe() local
401 if (atomic_add_unless(&sh->count, -1, 1)) in raid5_release_stripe()
404 if (unlikely(!conf->mddev->thread) || in raid5_release_stripe()
405 test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) in raid5_release_stripe()
407 wakeup = llist_add(&sh->release_list, &conf->released_stripes); in raid5_release_stripe()
409 md_wakeup_thread(conf->mddev->thread); in raid5_release_stripe()
413 if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) { in raid5_release_stripe()
415 hash = sh->hash_lock_index; in raid5_release_stripe()
416 do_release_stripe(conf, sh, &list); in raid5_release_stripe()
417 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_release_stripe()
418 release_inactive_stripe_list(conf, &list, hash); in raid5_release_stripe()
425 (unsigned long long)sh->sector); in remove_hash()
427 hlist_del_init(&sh->hash); in remove_hash()
430 static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh) in insert_hash() argument
432 struct hlist_head *hp = stripe_hash(conf, sh->sector); in insert_hash()
435 (unsigned long long)sh->sector); in insert_hash()
437 hlist_add_head(&sh->hash, hp); in insert_hash()
441 static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash) in get_free_stripe() argument
446 if (list_empty(conf->inactive_list + hash)) in get_free_stripe()
448 first = (conf->inactive_list + hash)->next; in get_free_stripe()
452 atomic_inc(&conf->active_stripes); in get_free_stripe()
453 BUG_ON(hash != sh->hash_lock_index); in get_free_stripe()
454 if (list_empty(conf->inactive_list + hash)) in get_free_stripe()
455 atomic_inc(&conf->empty_inactive_list_nr); in get_free_stripe()
467 if (!sh->pages) in free_stripe_pages()
470 for (i = 0; i < sh->nr_pages; i++) { in free_stripe_pages()
471 p = sh->pages[i]; in free_stripe_pages()
474 sh->pages[i] = NULL; in free_stripe_pages()
483 for (i = 0; i < sh->nr_pages; i++) { in alloc_stripe_pages()
485 if (sh->pages[i]) in alloc_stripe_pages()
491 return -ENOMEM; in alloc_stripe_pages()
493 sh->pages[i] = p; in alloc_stripe_pages()
499 init_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks) in init_stripe_shared_pages() argument
503 if (sh->pages) in init_stripe_shared_pages()
506 /* Each of the sh->dev[i] need one conf->stripe_size */ in init_stripe_shared_pages()
507 cnt = PAGE_SIZE / conf->stripe_size; in init_stripe_shared_pages()
508 nr_pages = (disks + cnt - 1) / cnt; in init_stripe_shared_pages()
510 sh->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); in init_stripe_shared_pages()
511 if (!sh->pages) in init_stripe_shared_pages()
512 return -ENOMEM; in init_stripe_shared_pages()
513 sh->nr_pages = nr_pages; in init_stripe_shared_pages()
514 sh->stripes_per_page = cnt; in init_stripe_shared_pages()
522 int num = sh->raid_conf->pool_size; in shrink_buffers()
528 WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); in shrink_buffers()
529 p = sh->dev[i].page; in shrink_buffers()
532 sh->dev[i].page = NULL; in shrink_buffers()
537 sh->dev[i].page = NULL; in shrink_buffers()
545 int num = sh->raid_conf->pool_size; in grow_buffers()
554 sh->dev[i].page = page; in grow_buffers()
555 sh->dev[i].orig_page = page; in grow_buffers()
556 sh->dev[i].offset = 0; in grow_buffers()
560 return -ENOMEM; in grow_buffers()
563 sh->dev[i].page = raid5_get_dev_page(sh, i); in grow_buffers()
564 sh->dev[i].orig_page = sh->dev[i].page; in grow_buffers()
565 sh->dev[i].offset = raid5_get_page_offset(sh, i); in grow_buffers()
571 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
576 struct r5conf *conf = sh->raid_conf; in init_stripe() local
579 BUG_ON(atomic_read(&sh->count) != 0); in init_stripe()
580 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); in init_stripe()
582 BUG_ON(sh->batch_head); in init_stripe()
587 seq = read_seqcount_begin(&conf->gen_lock); in init_stripe()
588 sh->generation = conf->generation - previous; in init_stripe()
589 sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; in init_stripe()
590 sh->sector = sector; in init_stripe()
591 stripe_set_idx(sector, conf, previous, sh); in init_stripe()
592 sh->state = 0; in init_stripe()
594 for (i = sh->disks; i--; ) { in init_stripe()
595 struct r5dev *dev = &sh->dev[i]; in init_stripe()
597 if (dev->toread || dev->read || dev->towrite || dev->written || in init_stripe()
598 test_bit(R5_LOCKED, &dev->flags)) { in init_stripe()
600 (unsigned long long)sh->sector, i, dev->toread, in init_stripe()
601 dev->read, dev->towrite, dev->written, in init_stripe()
602 test_bit(R5_LOCKED, &dev->flags)); in init_stripe()
605 dev->flags = 0; in init_stripe()
606 dev->sector = raid5_compute_blocknr(sh, i, previous); in init_stripe()
608 if (read_seqcount_retry(&conf->gen_lock, seq)) in init_stripe()
610 sh->overwrite_disks = 0; in init_stripe()
611 insert_hash(conf, sh); in init_stripe()
612 sh->cpu = smp_processor_id(); in init_stripe()
613 set_bit(STRIPE_BATCH_READY, &sh->state); in init_stripe()
616 static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, in __find_stripe() argument
622 hlist_for_each_entry(sh, stripe_hash(conf, sector), hash) in __find_stripe()
623 if (sh->sector == sector && sh->generation == generation) in __find_stripe()
629 static struct stripe_head *find_get_stripe(struct r5conf *conf, in find_get_stripe() argument
635 sh = __find_stripe(conf, sector, generation); in find_get_stripe()
639 if (atomic_inc_not_zero(&sh->count)) in find_get_stripe()
644 * be on a list (sh->lru). Must remove the stripe from the list that in find_get_stripe()
648 spin_lock(&conf->device_lock); in find_get_stripe()
649 if (!atomic_read(&sh->count)) { in find_get_stripe()
650 if (!test_bit(STRIPE_HANDLE, &sh->state)) in find_get_stripe()
651 atomic_inc(&conf->active_stripes); in find_get_stripe()
652 BUG_ON(list_empty(&sh->lru) && in find_get_stripe()
653 !test_bit(STRIPE_EXPANDING, &sh->state)); in find_get_stripe()
655 if (!list_empty(conf->inactive_list + hash)) in find_get_stripe()
657 list_del_init(&sh->lru); in find_get_stripe()
658 if (list_empty(conf->inactive_list + hash) && in find_get_stripe()
660 atomic_inc(&conf->empty_inactive_list_nr); in find_get_stripe()
661 if (sh->group) { in find_get_stripe()
662 sh->group->stripes_cnt--; in find_get_stripe()
663 sh->group = NULL; in find_get_stripe()
666 atomic_inc(&sh->count); in find_get_stripe()
667 spin_unlock(&conf->device_lock); in find_get_stripe()
674 * - start an array
675 * - remove non-faulty devices
676 * - add a spare
677 * - allow a reshape
682 * of the two sections, and some non-in_sync devices may
685 * Most calls to this function hold &conf->device_lock. Calls
689 int raid5_calc_degraded(struct r5conf *conf) in raid5_calc_degraded() argument
696 for (i = 0; i < conf->previous_raid_disks; i++) { in raid5_calc_degraded()
697 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_calc_degraded()
698 if (rdev && test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
699 rdev = rcu_dereference(conf->disks[i].replacement); in raid5_calc_degraded()
700 if (!rdev || test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
702 else if (test_bit(In_sync, &rdev->flags)) in raid5_calc_degraded()
705 /* not in-sync or faulty. in raid5_calc_degraded()
712 * be in-sync. in raid5_calc_degraded()
714 if (conf->raid_disks >= conf->previous_raid_disks) in raid5_calc_degraded()
718 if (conf->raid_disks == conf->previous_raid_disks) in raid5_calc_degraded()
722 for (i = 0; i < conf->raid_disks; i++) { in raid5_calc_degraded()
723 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_calc_degraded()
724 if (rdev && test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
725 rdev = rcu_dereference(conf->disks[i].replacement); in raid5_calc_degraded()
726 if (!rdev || test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
728 else if (test_bit(In_sync, &rdev->flags)) in raid5_calc_degraded()
731 /* not in-sync or faulty. in raid5_calc_degraded()
736 if (conf->raid_disks <= conf->previous_raid_disks) in raid5_calc_degraded()
745 static bool has_failed(struct r5conf *conf) in has_failed() argument
747 int degraded = conf->mddev->degraded; in has_failed()
749 if (test_bit(MD_BROKEN, &conf->mddev->flags)) in has_failed()
752 if (conf->mddev->reshape_position != MaxSector) in has_failed()
753 degraded = raid5_calc_degraded(conf); in has_failed()
755 return degraded > conf->max_degraded; in has_failed()
790 static bool is_inactive_blocked(struct r5conf *conf, int hash) in is_inactive_blocked() argument
792 if (list_empty(conf->inactive_list + hash)) in is_inactive_blocked()
795 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) in is_inactive_blocked()
798 return (atomic_read(&conf->active_stripes) < in is_inactive_blocked()
799 (conf->max_nr_stripes * 3 / 4)); in is_inactive_blocked()
802 struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, in raid5_get_active_stripe() argument
807 int hash = stripe_hash_locks_hash(conf, sector); in raid5_get_active_stripe()
812 spin_lock_irq(conf->hash_locks + hash); in raid5_get_active_stripe()
815 if (!(flags & R5_GAS_NOQUIESCE) && conf->quiesce) { in raid5_get_active_stripe()
823 if (ctx && ctx->batch_last) { in raid5_get_active_stripe()
824 raid5_release_stripe(ctx->batch_last); in raid5_get_active_stripe()
825 ctx->batch_last = NULL; in raid5_get_active_stripe()
828 wait_event_lock_irq(conf->wait_for_quiescent, in raid5_get_active_stripe()
829 !conf->quiesce, in raid5_get_active_stripe()
830 *(conf->hash_locks + hash)); in raid5_get_active_stripe()
833 sh = find_get_stripe(conf, sector, conf->generation - previous, in raid5_get_active_stripe()
838 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { in raid5_get_active_stripe()
839 sh = get_free_stripe(conf, hash); in raid5_get_active_stripe()
841 r5c_check_stripe_cache_usage(conf); in raid5_get_active_stripe()
843 atomic_inc(&sh->count); in raid5_get_active_stripe()
847 if (!test_bit(R5_DID_ALLOC, &conf->cache_state)) in raid5_get_active_stripe()
848 set_bit(R5_ALLOC_MORE, &conf->cache_state); in raid5_get_active_stripe()
854 set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); in raid5_get_active_stripe()
855 r5l_wake_reclaim(conf->log, 0); in raid5_get_active_stripe()
858 if (ctx && ctx->batch_last) { in raid5_get_active_stripe()
859 raid5_release_stripe(ctx->batch_last); in raid5_get_active_stripe()
860 ctx->batch_last = NULL; in raid5_get_active_stripe()
863 wait_event_lock_irq(conf->wait_for_stripe, in raid5_get_active_stripe()
864 is_inactive_blocked(conf, hash), in raid5_get_active_stripe()
865 *(conf->hash_locks + hash)); in raid5_get_active_stripe()
866 clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); in raid5_get_active_stripe()
869 spin_unlock_irq(conf->hash_locks + hash); in raid5_get_active_stripe()
875 BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded)); in is_full_stripe_write()
876 return sh->overwrite_disks == (sh->disks - sh->raid_conf->max_degraded); in is_full_stripe_write()
880 __acquires(&sh1->stripe_lock) in lock_two_stripes()
881 __acquires(&sh2->stripe_lock) in lock_two_stripes()
884 spin_lock_irq(&sh2->stripe_lock); in lock_two_stripes()
885 spin_lock_nested(&sh1->stripe_lock, 1); in lock_two_stripes()
887 spin_lock_irq(&sh1->stripe_lock); in lock_two_stripes()
888 spin_lock_nested(&sh2->stripe_lock, 1); in lock_two_stripes()
893 __releases(&sh1->stripe_lock) in unlock_two_stripes()
894 __releases(&sh2->stripe_lock) in unlock_two_stripes()
896 spin_unlock(&sh1->stripe_lock); in unlock_two_stripes()
897 spin_unlock_irq(&sh2->stripe_lock); in unlock_two_stripes()
903 struct r5conf *conf = sh->raid_conf; in stripe_can_batch() local
905 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in stripe_can_batch()
907 return test_bit(STRIPE_BATCH_READY, &sh->state) && in stripe_can_batch()
908 !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && in stripe_can_batch()
913 static void stripe_add_to_batch_list(struct r5conf *conf, in stripe_add_to_batch_list() argument
922 tmp_sec = sh->sector; in stripe_add_to_batch_list()
923 if (!sector_div(tmp_sec, conf->chunk_sectors)) in stripe_add_to_batch_list()
925 head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf); in stripe_add_to_batch_list()
927 if (last_sh && head_sector == last_sh->sector) { in stripe_add_to_batch_list()
929 atomic_inc(&head->count); in stripe_add_to_batch_list()
931 hash = stripe_hash_locks_hash(conf, head_sector); in stripe_add_to_batch_list()
932 spin_lock_irq(conf->hash_locks + hash); in stripe_add_to_batch_list()
933 head = find_get_stripe(conf, head_sector, conf->generation, in stripe_add_to_batch_list()
935 spin_unlock_irq(conf->hash_locks + hash); in stripe_add_to_batch_list()
947 if (sh->batch_head) in stripe_add_to_batch_list()
951 while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in stripe_add_to_batch_list()
953 if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf || in stripe_add_to_batch_list()
954 bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite)) in stripe_add_to_batch_list()
957 if (head->batch_head) { in stripe_add_to_batch_list()
958 spin_lock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
961 spin_unlock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
968 * this stripe->batch_head doesn't get assigned, which in stripe_add_to_batch_list()
971 sh->batch_head = head->batch_head; in stripe_add_to_batch_list()
977 list_add(&sh->batch_list, &head->batch_list); in stripe_add_to_batch_list()
978 spin_unlock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
980 head->batch_head = head; in stripe_add_to_batch_list()
981 sh->batch_head = head->batch_head; in stripe_add_to_batch_list()
982 spin_lock(&head->batch_lock); in stripe_add_to_batch_list()
983 list_add_tail(&sh->batch_list, &head->batch_list); in stripe_add_to_batch_list()
984 spin_unlock(&head->batch_lock); in stripe_add_to_batch_list()
987 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in stripe_add_to_batch_list()
988 if (atomic_dec_return(&conf->preread_active_stripes) in stripe_add_to_batch_list()
990 md_wakeup_thread(conf->mddev->thread); in stripe_add_to_batch_list()
992 if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) { in stripe_add_to_batch_list()
993 int seq = sh->bm_seq; in stripe_add_to_batch_list()
994 if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) && in stripe_add_to_batch_list()
995 sh->batch_head->bm_seq > seq) in stripe_add_to_batch_list()
996 seq = sh->batch_head->bm_seq; in stripe_add_to_batch_list()
997 set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state); in stripe_add_to_batch_list()
998 sh->batch_head->bm_seq = seq; in stripe_add_to_batch_list()
1001 atomic_inc(&sh->count); in stripe_add_to_batch_list()
1011 static int use_new_offset(struct r5conf *conf, struct stripe_head *sh) in use_new_offset() argument
1013 sector_t progress = conf->reshape_progress; in use_new_offset()
1015 * of conf->generation, or ->data_offset that was set before in use_new_offset()
1021 if (sh->generation == conf->generation - 1) in use_new_offset()
1023 /* We are in a reshape, and this is a new-generation stripe, in use_new_offset()
1044 if (da->sector > db->sector) in cmp_stripe()
1046 if (da->sector < db->sector) in cmp_stripe()
1047 return -1; in cmp_stripe()
1051 static void dispatch_defer_bios(struct r5conf *conf, int target, in dispatch_defer_bios() argument
1058 if (conf->pending_data_cnt == 0) in dispatch_defer_bios()
1061 list_sort(NULL, &conf->pending_list, cmp_stripe); in dispatch_defer_bios()
1063 first = conf->pending_list.next; in dispatch_defer_bios()
1066 if (conf->next_pending_data) in dispatch_defer_bios()
1067 list_move_tail(&conf->pending_list, in dispatch_defer_bios()
1068 &conf->next_pending_data->sibling); in dispatch_defer_bios()
1070 while (!list_empty(&conf->pending_list)) { in dispatch_defer_bios()
1071 data = list_first_entry(&conf->pending_list, in dispatch_defer_bios()
1073 if (&data->sibling == first) in dispatch_defer_bios()
1074 first = data->sibling.next; in dispatch_defer_bios()
1075 next = data->sibling.next; in dispatch_defer_bios()
1077 bio_list_merge(list, &data->bios); in dispatch_defer_bios()
1078 list_move(&data->sibling, &conf->free_list); in dispatch_defer_bios()
1083 conf->pending_data_cnt -= cnt; in dispatch_defer_bios()
1084 BUG_ON(conf->pending_data_cnt < 0 || cnt < target); in dispatch_defer_bios()
1086 if (next != &conf->pending_list) in dispatch_defer_bios()
1087 conf->next_pending_data = list_entry(next, in dispatch_defer_bios()
1090 conf->next_pending_data = NULL; in dispatch_defer_bios()
1092 if (first != &conf->pending_list) in dispatch_defer_bios()
1093 list_move_tail(&conf->pending_list, first); in dispatch_defer_bios()
1096 static void flush_deferred_bios(struct r5conf *conf) in flush_deferred_bios() argument
1100 if (conf->pending_data_cnt == 0) in flush_deferred_bios()
1103 spin_lock(&conf->pending_bios_lock); in flush_deferred_bios()
1104 dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp); in flush_deferred_bios()
1105 BUG_ON(conf->pending_data_cnt != 0); in flush_deferred_bios()
1106 spin_unlock(&conf->pending_bios_lock); in flush_deferred_bios()
1111 static void defer_issue_bios(struct r5conf *conf, sector_t sector, in defer_issue_bios() argument
1117 spin_lock(&conf->pending_bios_lock); in defer_issue_bios()
1118 ent = list_first_entry(&conf->free_list, struct r5pending_data, in defer_issue_bios()
1120 list_move_tail(&ent->sibling, &conf->pending_list); in defer_issue_bios()
1121 ent->sector = sector; in defer_issue_bios()
1122 bio_list_init(&ent->bios); in defer_issue_bios()
1123 bio_list_merge(&ent->bios, bios); in defer_issue_bios()
1124 conf->pending_data_cnt++; in defer_issue_bios()
1125 if (conf->pending_data_cnt >= PENDING_IO_MAX) in defer_issue_bios()
1126 dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp); in defer_issue_bios()
1128 spin_unlock(&conf->pending_bios_lock); in defer_issue_bios()
1140 struct r5conf *conf = sh->raid_conf; in ops_run_io() local
1141 int i, disks = sh->disks; in ops_run_io()
1152 should_defer = conf->batch_bio_dispatch && conf->group_cnt; in ops_run_io()
1154 for (i = disks; i--; ) { in ops_run_io()
1162 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { in ops_run_io()
1164 if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) in ops_run_io()
1166 if (test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_io()
1168 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) in ops_run_io()
1171 &sh->dev[i].flags)) { in ops_run_io()
1176 if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags)) in ops_run_io()
1180 dev = &sh->dev[i]; in ops_run_io()
1181 bi = &dev->req; in ops_run_io()
1182 rbi = &dev->rreq; /* For writing to replacement */ in ops_run_io()
1185 rrdev = rcu_dereference(conf->disks[i].replacement); in ops_run_io()
1187 rdev = rcu_dereference(conf->disks[i].rdev); in ops_run_io()
1199 if (test_bit(R5_ReadRepl, &head_sh->dev[i].flags) && rrdev) in ops_run_io()
1204 if (rdev && test_bit(Faulty, &rdev->flags)) in ops_run_io()
1207 atomic_inc(&rdev->nr_pending); in ops_run_io()
1208 if (rrdev && test_bit(Faulty, &rrdev->flags)) in ops_run_io()
1211 atomic_inc(&rrdev->nr_pending); in ops_run_io()
1219 test_bit(WriteErrorSeen, &rdev->flags)) { in ops_run_io()
1222 int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in ops_run_io()
1228 set_bit(BlockedBadBlocks, &rdev->flags); in ops_run_io()
1229 if (!conf->mddev->external && in ops_run_io()
1230 conf->mddev->sb_flags) { in ops_run_io()
1233 * bad block log - better give it in ops_run_io()
1235 md_check_recovery(conf->mddev); in ops_run_io()
1242 atomic_inc(&rdev->nr_pending); in ops_run_io()
1243 md_wait_for_blocked_rdev(rdev, conf->mddev); in ops_run_io()
1245 /* Acknowledged bad block - skip the write */ in ops_run_io()
1246 rdev_dec_pending(rdev, conf->mddev); in ops_run_io()
1252 if (s->syncing || s->expanding || s->expanded in ops_run_io()
1253 || s->replacing) in ops_run_io()
1254 md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf)); in ops_run_io()
1256 set_bit(STRIPE_IO_STARTED, &sh->state); in ops_run_io()
1258 bio_init(bi, rdev->bdev, &dev->vec, 1, op | op_flags); in ops_run_io()
1259 bi->bi_end_io = op_is_write(op) in ops_run_io()
1262 bi->bi_private = sh; in ops_run_io()
1265 __func__, (unsigned long long)sh->sector, in ops_run_io()
1266 bi->bi_opf, i); in ops_run_io()
1267 atomic_inc(&sh->count); in ops_run_io()
1269 atomic_inc(&head_sh->count); in ops_run_io()
1270 if (use_new_offset(conf, sh)) in ops_run_io()
1271 bi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1272 + rdev->new_data_offset); in ops_run_io()
1274 bi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1275 + rdev->data_offset); in ops_run_io()
1276 if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags)) in ops_run_io()
1277 bi->bi_opf |= REQ_NOMERGE; in ops_run_io()
1279 if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) in ops_run_io()
1280 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in ops_run_io()
1283 test_bit(R5_InJournal, &sh->dev[i].flags)) in ops_run_io()
1289 sh->dev[i].vec.bv_page = sh->dev[i].orig_page; in ops_run_io()
1291 sh->dev[i].vec.bv_page = sh->dev[i].page; in ops_run_io()
1292 bi->bi_vcnt = 1; in ops_run_io()
1293 bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1294 bi->bi_io_vec[0].bv_offset = sh->dev[i].offset; in ops_run_io()
1295 bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1301 bi->bi_vcnt = 0; in ops_run_io()
1303 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); in ops_run_io()
1305 if (conf->mddev->gendisk) in ops_run_io()
1307 disk_devt(conf->mddev->gendisk), in ops_run_io()
1308 sh->dev[i].sector); in ops_run_io()
1315 if (s->syncing || s->expanding || s->expanded in ops_run_io()
1316 || s->replacing) in ops_run_io()
1317 md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf)); in ops_run_io()
1319 set_bit(STRIPE_IO_STARTED, &sh->state); in ops_run_io()
1321 bio_init(rbi, rrdev->bdev, &dev->rvec, 1, op | op_flags); in ops_run_io()
1323 rbi->bi_end_io = raid5_end_write_request; in ops_run_io()
1324 rbi->bi_private = sh; in ops_run_io()
1328 __func__, (unsigned long long)sh->sector, in ops_run_io()
1329 rbi->bi_opf, i); in ops_run_io()
1330 atomic_inc(&sh->count); in ops_run_io()
1332 atomic_inc(&head_sh->count); in ops_run_io()
1333 if (use_new_offset(conf, sh)) in ops_run_io()
1334 rbi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1335 + rrdev->new_data_offset); in ops_run_io()
1337 rbi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1338 + rrdev->data_offset); in ops_run_io()
1339 if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) in ops_run_io()
1340 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in ops_run_io()
1341 sh->dev[i].rvec.bv_page = sh->dev[i].page; in ops_run_io()
1342 rbi->bi_vcnt = 1; in ops_run_io()
1343 rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1344 rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset; in ops_run_io()
1345 rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1351 rbi->bi_vcnt = 0; in ops_run_io()
1352 if (conf->mddev->gendisk) in ops_run_io()
1354 disk_devt(conf->mddev->gendisk), in ops_run_io()
1355 sh->dev[i].sector); in ops_run_io()
1363 set_bit(STRIPE_DEGRADED, &sh->state); in ops_run_io()
1365 bi->bi_opf, i, (unsigned long long)sh->sector); in ops_run_io()
1366 clear_bit(R5_LOCKED, &sh->dev[i].flags); in ops_run_io()
1367 set_bit(STRIPE_HANDLE, &sh->state); in ops_run_io()
1370 if (!head_sh->batch_head) in ops_run_io()
1372 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_io()
1379 defer_issue_bios(conf, head_sh->sector, &pending_bios); in ops_run_io()
1384 unsigned int poff, sector_t sector, struct dma_async_tx_descriptor *tx, in async_copy_data() argument
1393 struct r5conf *conf = sh->raid_conf; in async_copy_data() local
1395 if (bio->bi_iter.bi_sector >= sector) in async_copy_data()
1396 page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512; in async_copy_data()
1398 page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512; in async_copy_data()
1402 init_async_submit(&submit, flags, tx, NULL, NULL, NULL); in async_copy_data()
1410 b_offset = -page_offset; in async_copy_data()
1412 len -= b_offset; in async_copy_data()
1415 if (len > 0 && page_offset + len > RAID5_STRIPE_SIZE(conf)) in async_copy_data()
1416 clen = RAID5_STRIPE_SIZE(conf) - page_offset; in async_copy_data()
1424 if (conf->skip_copy && in async_copy_data()
1426 clen == RAID5_STRIPE_SIZE(conf) && in async_copy_data()
1430 tx = async_memcpy(*page, bio_page, page_offset + poff, in async_copy_data()
1433 tx = async_memcpy(bio_page, *page, b_offset, in async_copy_data()
1437 submit.depend_tx = tx; in async_copy_data()
1444 return tx; in async_copy_data()
1451 struct r5conf *conf = sh->raid_conf; in ops_complete_biofill() local
1454 (unsigned long long)sh->sector); in ops_complete_biofill()
1457 for (i = sh->disks; i--; ) { in ops_complete_biofill()
1458 struct r5dev *dev = &sh->dev[i]; in ops_complete_biofill()
1465 if (test_and_clear_bit(R5_Wantfill, &dev->flags)) { in ops_complete_biofill()
1468 BUG_ON(!dev->read); in ops_complete_biofill()
1469 rbi = dev->read; in ops_complete_biofill()
1470 dev->read = NULL; in ops_complete_biofill()
1471 while (rbi && rbi->bi_iter.bi_sector < in ops_complete_biofill()
1472 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_complete_biofill()
1473 rbi2 = r5_next_bio(conf, rbi, dev->sector); in ops_complete_biofill()
1479 clear_bit(STRIPE_BIOFILL_RUN, &sh->state); in ops_complete_biofill()
1481 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_biofill()
1487 struct dma_async_tx_descriptor *tx = NULL; in ops_run_biofill() local
1490 struct r5conf *conf = sh->raid_conf; in ops_run_biofill() local
1492 BUG_ON(sh->batch_head); in ops_run_biofill()
1494 (unsigned long long)sh->sector); in ops_run_biofill()
1496 for (i = sh->disks; i--; ) { in ops_run_biofill()
1497 struct r5dev *dev = &sh->dev[i]; in ops_run_biofill()
1498 if (test_bit(R5_Wantfill, &dev->flags)) { in ops_run_biofill()
1500 spin_lock_irq(&sh->stripe_lock); in ops_run_biofill()
1501 dev->read = rbi = dev->toread; in ops_run_biofill()
1502 dev->toread = NULL; in ops_run_biofill()
1503 spin_unlock_irq(&sh->stripe_lock); in ops_run_biofill()
1504 while (rbi && rbi->bi_iter.bi_sector < in ops_run_biofill()
1505 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_run_biofill()
1506 tx = async_copy_data(0, rbi, &dev->page, in ops_run_biofill()
1507 dev->offset, in ops_run_biofill()
1508 dev->sector, tx, sh, 0); in ops_run_biofill()
1509 rbi = r5_next_bio(conf, rbi, dev->sector); in ops_run_biofill()
1514 atomic_inc(&sh->count); in ops_run_biofill()
1515 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); in ops_run_biofill()
1526 tgt = &sh->dev[target]; in mark_target_uptodate()
1527 set_bit(R5_UPTODATE, &tgt->flags); in mark_target_uptodate()
1528 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in mark_target_uptodate()
1529 clear_bit(R5_Wantcompute, &tgt->flags); in mark_target_uptodate()
1537 (unsigned long long)sh->sector); in ops_complete_compute()
1540 mark_target_uptodate(sh, sh->ops.target); in ops_complete_compute()
1541 mark_target_uptodate(sh, sh->ops.target2); in ops_complete_compute()
1543 clear_bit(STRIPE_COMPUTE_RUN, &sh->state); in ops_complete_compute()
1544 if (sh->check_state == check_state_compute_run) in ops_complete_compute()
1545 sh->check_state = check_state_compute_result; in ops_complete_compute()
1546 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_compute()
1553 return percpu->scribble + i * percpu->scribble_obj_size; in to_addr_page()
1560 return (void *) (to_addr_page(percpu, i) + sh->disks + 2); in to_addr_conv()
1569 return (unsigned int *) (to_addr_conv(sh, percpu, 0) + sh->disks + 2); in to_addr_offs()
1575 int disks = sh->disks; in ops_run_compute5()
1578 int target = sh->ops.target; in ops_run_compute5()
1579 struct r5dev *tgt = &sh->dev[target]; in ops_run_compute5()
1580 struct page *xor_dest = tgt->page; in ops_run_compute5()
1581 unsigned int off_dest = tgt->offset; in ops_run_compute5()
1583 struct dma_async_tx_descriptor *tx; in ops_run_compute5() local
1587 BUG_ON(sh->batch_head); in ops_run_compute5()
1590 __func__, (unsigned long long)sh->sector, target); in ops_run_compute5()
1591 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute5()
1593 for (i = disks; i--; ) { in ops_run_compute5()
1595 off_srcs[count] = sh->dev[i].offset; in ops_run_compute5()
1596 xor_srcs[count++] = sh->dev[i].page; in ops_run_compute5()
1600 atomic_inc(&sh->count); in ops_run_compute5()
1605 tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], in ops_run_compute5()
1606 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute5()
1608 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_compute5()
1609 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute5()
1611 return tx; in ops_run_compute5()
1614 /* set_syndrome_sources - populate source buffers for gen_syndrome
1615 * @srcs - (struct page *) array of size sh->disks
1616 * @offs - (unsigned int) array of offset for each page
1617 * @sh - stripe_head to parse
1629 int disks = sh->disks; in set_syndrome_sources()
1630 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); in set_syndrome_sources()
1642 struct r5dev *dev = &sh->dev[i]; in set_syndrome_sources()
1644 if (i == sh->qd_idx || i == sh->pd_idx || in set_syndrome_sources()
1647 (test_bit(R5_Wantdrain, &dev->flags) || in set_syndrome_sources()
1648 test_bit(R5_InJournal, &dev->flags))) || in set_syndrome_sources()
1650 (dev->written || in set_syndrome_sources()
1651 test_bit(R5_InJournal, &dev->flags)))) { in set_syndrome_sources()
1652 if (test_bit(R5_InJournal, &dev->flags)) in set_syndrome_sources()
1653 srcs[slot] = sh->dev[i].orig_page; in set_syndrome_sources()
1655 srcs[slot] = sh->dev[i].page; in set_syndrome_sources()
1661 offs[slot] = sh->dev[i].offset; in set_syndrome_sources()
1672 int disks = sh->disks; in ops_run_compute6_1()
1676 int qd_idx = sh->qd_idx; in ops_run_compute6_1()
1677 struct dma_async_tx_descriptor *tx; in ops_run_compute6_1() local
1685 BUG_ON(sh->batch_head); in ops_run_compute6_1()
1686 if (sh->ops.target < 0) in ops_run_compute6_1()
1687 target = sh->ops.target2; in ops_run_compute6_1()
1688 else if (sh->ops.target2 < 0) in ops_run_compute6_1()
1689 target = sh->ops.target; in ops_run_compute6_1()
1695 __func__, (unsigned long long)sh->sector, target); in ops_run_compute6_1()
1697 tgt = &sh->dev[target]; in ops_run_compute6_1()
1698 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute6_1()
1699 dest = tgt->page; in ops_run_compute6_1()
1700 dest_off = tgt->offset; in ops_run_compute6_1()
1702 atomic_inc(&sh->count); in ops_run_compute6_1()
1711 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_compute6_1()
1712 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute6_1()
1714 /* Compute any data- or p-drive using XOR */ in ops_run_compute6_1()
1716 for (i = disks; i-- ; ) { in ops_run_compute6_1()
1719 offs[count] = sh->dev[i].offset; in ops_run_compute6_1()
1720 blocks[count++] = sh->dev[i].page; in ops_run_compute6_1()
1726 tx = async_xor_offs(dest, dest_off, blocks, offs, count, in ops_run_compute6_1()
1727 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute6_1()
1730 return tx; in ops_run_compute6_1()
1736 int i, count, disks = sh->disks; in ops_run_compute6_2()
1737 int syndrome_disks = sh->ddf_layout ? disks : disks-2; in ops_run_compute6_2()
1739 int faila = -1, failb = -1; in ops_run_compute6_2()
1740 int target = sh->ops.target; in ops_run_compute6_2()
1741 int target2 = sh->ops.target2; in ops_run_compute6_2()
1742 struct r5dev *tgt = &sh->dev[target]; in ops_run_compute6_2()
1743 struct r5dev *tgt2 = &sh->dev[target2]; in ops_run_compute6_2()
1744 struct dma_async_tx_descriptor *tx; in ops_run_compute6_2() local
1749 BUG_ON(sh->batch_head); in ops_run_compute6_2()
1751 __func__, (unsigned long long)sh->sector, target, target2); in ops_run_compute6_2()
1753 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute6_2()
1754 BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags)); in ops_run_compute6_2()
1756 /* we need to open-code set_syndrome_sources to handle the in ops_run_compute6_2()
1768 offs[slot] = sh->dev[i].offset; in ops_run_compute6_2()
1769 blocks[slot] = sh->dev[i].page; in ops_run_compute6_2()
1782 __func__, (unsigned long long)sh->sector, faila, failb); in ops_run_compute6_2()
1784 atomic_inc(&sh->count); in ops_run_compute6_2()
1794 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1800 int qd_idx = sh->qd_idx; in ops_run_compute6_2()
1809 for (i = disks; i-- ; ) { in ops_run_compute6_2()
1812 offs[count] = sh->dev[i].offset; in ops_run_compute6_2()
1813 blocks[count++] = sh->dev[i].page; in ops_run_compute6_2()
1815 dest = sh->dev[data_target].page; in ops_run_compute6_2()
1816 dest_off = sh->dev[data_target].offset; in ops_run_compute6_2()
1821 tx = async_xor_offs(dest, dest_off, blocks, offs, count, in ops_run_compute6_2()
1822 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1826 init_async_submit(&submit, ASYNC_TX_FENCE, tx, in ops_run_compute6_2()
1830 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1840 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1846 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1858 (unsigned long long)sh->sector); in ops_complete_prexor()
1860 if (r5c_is_writeback(sh->raid_conf->log)) in ops_complete_prexor()
1862 * raid5-cache write back uses orig_page during prexor. in ops_complete_prexor()
1870 struct dma_async_tx_descriptor *tx) in ops_run_prexor5() argument
1872 int disks = sh->disks; in ops_run_prexor5()
1875 int count = 0, pd_idx = sh->pd_idx, i; in ops_run_prexor5()
1879 unsigned int off_dest = off_srcs[count] = sh->dev[pd_idx].offset; in ops_run_prexor5()
1880 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; in ops_run_prexor5()
1882 BUG_ON(sh->batch_head); in ops_run_prexor5()
1884 (unsigned long long)sh->sector); in ops_run_prexor5()
1886 for (i = disks; i--; ) { in ops_run_prexor5()
1887 struct r5dev *dev = &sh->dev[i]; in ops_run_prexor5()
1889 if (test_bit(R5_InJournal, &dev->flags)) { in ops_run_prexor5()
1894 off_srcs[count] = dev->offset; in ops_run_prexor5()
1895 xor_srcs[count++] = dev->orig_page; in ops_run_prexor5()
1896 } else if (test_bit(R5_Wantdrain, &dev->flags)) { in ops_run_prexor5()
1897 off_srcs[count] = dev->offset; in ops_run_prexor5()
1898 xor_srcs[count++] = dev->page; in ops_run_prexor5()
1902 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, in ops_run_prexor5()
1904 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_prexor5()
1905 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_prexor5()
1907 return tx; in ops_run_prexor5()
1912 struct dma_async_tx_descriptor *tx) in ops_run_prexor6() argument
1920 (unsigned long long)sh->sector); in ops_run_prexor6()
1924 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx, in ops_run_prexor6()
1926 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_prexor6()
1927 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_prexor6()
1929 return tx; in ops_run_prexor6()
1933 ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) in ops_run_biodrain() argument
1935 struct r5conf *conf = sh->raid_conf; in ops_run_biodrain() local
1936 int disks = sh->disks; in ops_run_biodrain()
1941 (unsigned long long)sh->sector); in ops_run_biodrain()
1943 for (i = disks; i--; ) { in ops_run_biodrain()
1948 if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) { in ops_run_biodrain()
1952 dev = &sh->dev[i]; in ops_run_biodrain()
1957 clear_bit(R5_InJournal, &dev->flags); in ops_run_biodrain()
1958 spin_lock_irq(&sh->stripe_lock); in ops_run_biodrain()
1959 chosen = dev->towrite; in ops_run_biodrain()
1960 dev->towrite = NULL; in ops_run_biodrain()
1961 sh->overwrite_disks = 0; in ops_run_biodrain()
1962 BUG_ON(dev->written); in ops_run_biodrain()
1963 wbi = dev->written = chosen; in ops_run_biodrain()
1964 spin_unlock_irq(&sh->stripe_lock); in ops_run_biodrain()
1965 WARN_ON(dev->page != dev->orig_page); in ops_run_biodrain()
1967 while (wbi && wbi->bi_iter.bi_sector < in ops_run_biodrain()
1968 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_run_biodrain()
1969 if (wbi->bi_opf & REQ_FUA) in ops_run_biodrain()
1970 set_bit(R5_WantFUA, &dev->flags); in ops_run_biodrain()
1971 if (wbi->bi_opf & REQ_SYNC) in ops_run_biodrain()
1972 set_bit(R5_SyncIO, &dev->flags); in ops_run_biodrain()
1974 set_bit(R5_Discard, &dev->flags); in ops_run_biodrain()
1976 tx = async_copy_data(1, wbi, &dev->page, in ops_run_biodrain()
1977 dev->offset, in ops_run_biodrain()
1978 dev->sector, tx, sh, in ops_run_biodrain()
1979 r5c_is_writeback(conf->log)); in ops_run_biodrain()
1980 if (dev->page != dev->orig_page && in ops_run_biodrain()
1981 !r5c_is_writeback(conf->log)) { in ops_run_biodrain()
1982 set_bit(R5_SkipCopy, &dev->flags); in ops_run_biodrain()
1983 clear_bit(R5_UPTODATE, &dev->flags); in ops_run_biodrain()
1984 clear_bit(R5_OVERWRITE, &dev->flags); in ops_run_biodrain()
1987 wbi = r5_next_bio(conf, wbi, dev->sector); in ops_run_biodrain()
1990 if (head_sh->batch_head) { in ops_run_biodrain()
1991 sh = list_first_entry(&sh->batch_list, in ops_run_biodrain()
2001 return tx; in ops_run_biodrain()
2007 int disks = sh->disks; in ops_complete_reconstruct()
2008 int pd_idx = sh->pd_idx; in ops_complete_reconstruct()
2009 int qd_idx = sh->qd_idx; in ops_complete_reconstruct()
2014 (unsigned long long)sh->sector); in ops_complete_reconstruct()
2016 for (i = disks; i--; ) { in ops_complete_reconstruct()
2017 fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); in ops_complete_reconstruct()
2018 sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); in ops_complete_reconstruct()
2019 discard |= test_bit(R5_Discard, &sh->dev[i].flags); in ops_complete_reconstruct()
2022 for (i = disks; i--; ) { in ops_complete_reconstruct()
2023 struct r5dev *dev = &sh->dev[i]; in ops_complete_reconstruct()
2025 if (dev->written || i == pd_idx || i == qd_idx) { in ops_complete_reconstruct()
2026 if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) { in ops_complete_reconstruct()
2027 set_bit(R5_UPTODATE, &dev->flags); in ops_complete_reconstruct()
2028 if (test_bit(STRIPE_EXPAND_READY, &sh->state)) in ops_complete_reconstruct()
2029 set_bit(R5_Expanded, &dev->flags); in ops_complete_reconstruct()
2032 set_bit(R5_WantFUA, &dev->flags); in ops_complete_reconstruct()
2034 set_bit(R5_SyncIO, &dev->flags); in ops_complete_reconstruct()
2038 if (sh->reconstruct_state == reconstruct_state_drain_run) in ops_complete_reconstruct()
2039 sh->reconstruct_state = reconstruct_state_drain_result; in ops_complete_reconstruct()
2040 else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) in ops_complete_reconstruct()
2041 sh->reconstruct_state = reconstruct_state_prexor_drain_result; in ops_complete_reconstruct()
2043 BUG_ON(sh->reconstruct_state != reconstruct_state_run); in ops_complete_reconstruct()
2044 sh->reconstruct_state = reconstruct_state_result; in ops_complete_reconstruct()
2047 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_reconstruct()
2053 struct dma_async_tx_descriptor *tx) in ops_run_reconstruct5() argument
2055 int disks = sh->disks; in ops_run_reconstruct5()
2059 int count, pd_idx = sh->pd_idx, i; in ops_run_reconstruct5()
2069 (unsigned long long)sh->sector); in ops_run_reconstruct5()
2071 for (i = 0; i < sh->disks; i++) { in ops_run_reconstruct5()
2074 if (!test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_reconstruct5()
2077 if (i >= sh->disks) { in ops_run_reconstruct5()
2078 atomic_inc(&sh->count); in ops_run_reconstruct5()
2079 set_bit(R5_Discard, &sh->dev[pd_idx].flags); in ops_run_reconstruct5()
2088 * that are part of a read-modify-write (written) in ops_run_reconstruct5()
2090 if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) { in ops_run_reconstruct5()
2092 off_dest = off_srcs[count] = sh->dev[pd_idx].offset; in ops_run_reconstruct5()
2093 xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; in ops_run_reconstruct5()
2094 for (i = disks; i--; ) { in ops_run_reconstruct5()
2095 struct r5dev *dev = &sh->dev[i]; in ops_run_reconstruct5()
2096 if (head_sh->dev[i].written || in ops_run_reconstruct5()
2097 test_bit(R5_InJournal, &head_sh->dev[i].flags)) { in ops_run_reconstruct5()
2098 off_srcs[count] = dev->offset; in ops_run_reconstruct5()
2099 xor_srcs[count++] = dev->page; in ops_run_reconstruct5()
2103 xor_dest = sh->dev[pd_idx].page; in ops_run_reconstruct5()
2104 off_dest = sh->dev[pd_idx].offset; in ops_run_reconstruct5()
2105 for (i = disks; i--; ) { in ops_run_reconstruct5()
2106 struct r5dev *dev = &sh->dev[i]; in ops_run_reconstruct5()
2108 off_srcs[count] = dev->offset; in ops_run_reconstruct5()
2109 xor_srcs[count++] = dev->page; in ops_run_reconstruct5()
2119 last_stripe = !head_sh->batch_head || in ops_run_reconstruct5()
2120 list_first_entry(&sh->batch_list, in ops_run_reconstruct5()
2126 atomic_inc(&head_sh->count); in ops_run_reconstruct5()
2127 init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh, in ops_run_reconstruct5()
2131 init_async_submit(&submit, flags, tx, NULL, NULL, in ops_run_reconstruct5()
2136 tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], in ops_run_reconstruct5()
2137 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct5()
2139 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_reconstruct5()
2140 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct5()
2143 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_reconstruct5()
2151 struct dma_async_tx_descriptor *tx) in ops_run_reconstruct6() argument
2162 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); in ops_run_reconstruct6()
2164 for (i = 0; i < sh->disks; i++) { in ops_run_reconstruct6()
2165 if (sh->pd_idx == i || sh->qd_idx == i) in ops_run_reconstruct6()
2167 if (!test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_reconstruct6()
2170 if (i >= sh->disks) { in ops_run_reconstruct6()
2171 atomic_inc(&sh->count); in ops_run_reconstruct6()
2172 set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); in ops_run_reconstruct6()
2173 set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); in ops_run_reconstruct6()
2182 if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { in ops_run_reconstruct6()
2191 last_stripe = !head_sh->batch_head || in ops_run_reconstruct6()
2192 list_first_entry(&sh->batch_list, in ops_run_reconstruct6()
2196 atomic_inc(&head_sh->count); in ops_run_reconstruct6()
2197 init_async_submit(&submit, txflags, tx, ops_complete_reconstruct, in ops_run_reconstruct6()
2200 init_async_submit(&submit, 0, tx, NULL, NULL, in ops_run_reconstruct6()
2202 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_reconstruct6()
2203 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct6()
2206 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_reconstruct6()
2217 (unsigned long long)sh->sector); in ops_complete_check()
2219 sh->check_state = check_state_check_result; in ops_complete_check()
2220 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_check()
2226 int disks = sh->disks; in ops_run_check_p()
2227 int pd_idx = sh->pd_idx; in ops_run_check_p()
2228 int qd_idx = sh->qd_idx; in ops_run_check_p()
2233 struct dma_async_tx_descriptor *tx; in ops_run_check_p() local
2239 (unsigned long long)sh->sector); in ops_run_check_p()
2241 BUG_ON(sh->batch_head); in ops_run_check_p()
2243 xor_dest = sh->dev[pd_idx].page; in ops_run_check_p()
2244 off_dest = sh->dev[pd_idx].offset; in ops_run_check_p()
2247 for (i = disks; i--; ) { in ops_run_check_p()
2250 off_srcs[count] = sh->dev[i].offset; in ops_run_check_p()
2251 xor_srcs[count++] = sh->dev[i].page; in ops_run_check_p()
2256 tx = async_xor_val_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_check_p()
2257 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_check_p()
2258 &sh->ops.zero_sum_result, &submit); in ops_run_check_p()
2260 atomic_inc(&sh->count); in ops_run_check_p()
2261 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); in ops_run_check_p()
2262 tx = async_trigger_callback(&submit); in ops_run_check_p()
2273 (unsigned long long)sh->sector, checkp); in ops_run_check_pq()
2275 BUG_ON(sh->batch_head); in ops_run_check_pq()
2280 atomic_inc(&sh->count); in ops_run_check_pq()
2284 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_check_pq()
2285 &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit); in ops_run_check_pq()
2290 int overlap_clear = 0, i, disks = sh->disks; in raid_run_ops()
2291 struct dma_async_tx_descriptor *tx = NULL; in raid_run_ops() local
2292 struct r5conf *conf = sh->raid_conf; in raid_run_ops() local
2293 int level = conf->level; in raid_run_ops()
2296 local_lock(&conf->percpu->lock); in raid_run_ops()
2297 percpu = this_cpu_ptr(conf->percpu); in raid_run_ops()
2305 tx = ops_run_compute5(sh, percpu); in raid_run_ops()
2307 if (sh->ops.target2 < 0 || sh->ops.target < 0) in raid_run_ops()
2308 tx = ops_run_compute6_1(sh, percpu); in raid_run_ops()
2310 tx = ops_run_compute6_2(sh, percpu); in raid_run_ops()
2313 if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) in raid_run_ops()
2314 async_tx_ack(tx); in raid_run_ops()
2319 tx = ops_run_prexor5(sh, percpu, tx); in raid_run_ops()
2321 tx = ops_run_prexor6(sh, percpu, tx); in raid_run_ops()
2325 tx = ops_run_partial_parity(sh, percpu, tx); in raid_run_ops()
2328 tx = ops_run_biodrain(sh, tx); in raid_run_ops()
2334 ops_run_reconstruct5(sh, percpu, tx); in raid_run_ops()
2336 ops_run_reconstruct6(sh, percpu, tx); in raid_run_ops()
2340 if (sh->check_state == check_state_run) in raid_run_ops()
2342 else if (sh->check_state == check_state_run_q) in raid_run_ops()
2344 else if (sh->check_state == check_state_run_pq) in raid_run_ops()
2350 if (overlap_clear && !sh->batch_head) { in raid_run_ops()
2351 for (i = disks; i--; ) { in raid_run_ops()
2352 struct r5dev *dev = &sh->dev[i]; in raid_run_ops()
2353 if (test_and_clear_bit(R5_Overlap, &dev->flags)) in raid_run_ops()
2354 wake_up(&sh->raid_conf->wait_for_overlap); in raid_run_ops()
2357 local_unlock(&conf->percpu->lock); in raid_run_ops()
2363 kfree(sh->pages); in free_stripe()
2365 if (sh->ppl_page) in free_stripe()
2366 __free_page(sh->ppl_page); in free_stripe()
2371 int disks, struct r5conf *conf) in alloc_stripe() argument
2377 spin_lock_init(&sh->stripe_lock); in alloc_stripe()
2378 spin_lock_init(&sh->batch_lock); in alloc_stripe()
2379 INIT_LIST_HEAD(&sh->batch_list); in alloc_stripe()
2380 INIT_LIST_HEAD(&sh->lru); in alloc_stripe()
2381 INIT_LIST_HEAD(&sh->r5c); in alloc_stripe()
2382 INIT_LIST_HEAD(&sh->log_list); in alloc_stripe()
2383 atomic_set(&sh->count, 1); in alloc_stripe()
2384 sh->raid_conf = conf; in alloc_stripe()
2385 sh->log_start = MaxSector; in alloc_stripe()
2387 if (raid5_has_ppl(conf)) { in alloc_stripe()
2388 sh->ppl_page = alloc_page(gfp); in alloc_stripe()
2389 if (!sh->ppl_page) { in alloc_stripe()
2395 if (init_stripe_shared_pages(sh, conf, disks)) { in alloc_stripe()
2403 static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) in grow_one_stripe() argument
2407 sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf); in grow_one_stripe()
2413 free_stripe(conf->slab_cache, sh); in grow_one_stripe()
2416 sh->hash_lock_index = in grow_one_stripe()
2417 conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS; in grow_one_stripe()
2419 atomic_inc(&conf->active_stripes); in grow_one_stripe()
2422 WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1); in grow_one_stripe()
2426 static int grow_stripes(struct r5conf *conf, int num) in grow_stripes() argument
2429 size_t namelen = sizeof(conf->cache_name[0]); in grow_stripes()
2430 int devs = max(conf->raid_disks, conf->previous_raid_disks); in grow_stripes()
2432 if (conf->mddev->gendisk) in grow_stripes()
2433 snprintf(conf->cache_name[0], namelen, in grow_stripes()
2434 "raid%d-%s", conf->level, mdname(conf->mddev)); in grow_stripes()
2436 snprintf(conf->cache_name[0], namelen, in grow_stripes()
2437 "raid%d-%p", conf->level, conf->mddev); in grow_stripes()
2438 snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]); in grow_stripes()
2440 conf->active_name = 0; in grow_stripes()
2441 sc = kmem_cache_create(conf->cache_name[conf->active_name], in grow_stripes()
2446 conf->slab_cache = sc; in grow_stripes()
2447 conf->pool_size = devs; in grow_stripes()
2448 while (num--) in grow_stripes()
2449 if (!grow_one_stripe(conf, GFP_KERNEL)) in grow_stripes()
2456 * scribble_alloc - allocate percpu scribble buffer for required size
2487 return -ENOMEM; in scribble_alloc()
2489 kvfree(percpu->scribble); in scribble_alloc()
2491 percpu->scribble = scribble; in scribble_alloc()
2492 percpu->scribble_obj_size = obj_size; in scribble_alloc()
2496 static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) in resize_chunks() argument
2506 if (conf->scribble_disks >= new_disks && in resize_chunks()
2507 conf->scribble_sectors >= new_sectors) in resize_chunks()
2509 mddev_suspend(conf->mddev); in resize_chunks()
2515 percpu = per_cpu_ptr(conf->percpu, cpu); in resize_chunks()
2517 new_sectors / RAID5_STRIPE_SECTORS(conf)); in resize_chunks()
2523 mddev_resume(conf->mddev); in resize_chunks()
2525 conf->scribble_disks = new_disks; in resize_chunks()
2526 conf->scribble_sectors = new_sectors; in resize_chunks()
2531 static int resize_stripes(struct r5conf *conf, int newsize) in resize_stripes() argument
2545 * 3/ reallocate conf->disks to be suitable bigger. If this fails, in resize_stripes()
2546 * we simple return a failure status - no need to clean anything up. in resize_stripes()
2564 md_allow_write(conf->mddev); in resize_stripes()
2567 sc = kmem_cache_create(conf->cache_name[1-conf->active_name], in resize_stripes()
2571 return -ENOMEM; in resize_stripes()
2573 /* Need to ensure auto-resizing doesn't interfere */ in resize_stripes()
2574 mutex_lock(&conf->cache_size_mutex); in resize_stripes()
2576 for (i = conf->max_nr_stripes; i; i--) { in resize_stripes()
2577 nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf); in resize_stripes()
2581 list_add(&nsh->lru, &newstripes); in resize_stripes()
2587 list_del(&nsh->lru); in resize_stripes()
2591 mutex_unlock(&conf->cache_size_mutex); in resize_stripes()
2592 return -ENOMEM; in resize_stripes()
2594 /* Step 2 - Must use GFP_NOIO now. in resize_stripes()
2601 lock_device_hash_lock(conf, hash); in resize_stripes()
2602 wait_event_cmd(conf->wait_for_stripe, in resize_stripes()
2603 !list_empty(conf->inactive_list + hash), in resize_stripes()
2604 unlock_device_hash_lock(conf, hash), in resize_stripes()
2605 lock_device_hash_lock(conf, hash)); in resize_stripes()
2606 osh = get_free_stripe(conf, hash); in resize_stripes()
2607 unlock_device_hash_lock(conf, hash); in resize_stripes()
2610 for (i = 0; i < osh->nr_pages; i++) { in resize_stripes()
2611 nsh->pages[i] = osh->pages[i]; in resize_stripes()
2612 osh->pages[i] = NULL; in resize_stripes()
2615 for(i=0; i<conf->pool_size; i++) { in resize_stripes()
2616 nsh->dev[i].page = osh->dev[i].page; in resize_stripes()
2617 nsh->dev[i].orig_page = osh->dev[i].page; in resize_stripes()
2618 nsh->dev[i].offset = osh->dev[i].offset; in resize_stripes()
2620 nsh->hash_lock_index = hash; in resize_stripes()
2621 free_stripe(conf->slab_cache, osh); in resize_stripes()
2623 if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + in resize_stripes()
2624 !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { in resize_stripes()
2629 kmem_cache_destroy(conf->slab_cache); in resize_stripes()
2634 * conf->disks and the scribble region in resize_stripes()
2638 for (i = 0; i < conf->pool_size; i++) in resize_stripes()
2639 ndisks[i] = conf->disks[i]; in resize_stripes()
2641 for (i = conf->pool_size; i < newsize; i++) { in resize_stripes()
2644 err = -ENOMEM; in resize_stripes()
2648 for (i = conf->pool_size; i < newsize; i++) in resize_stripes()
2653 kfree(conf->disks); in resize_stripes()
2654 conf->disks = ndisks; in resize_stripes()
2657 err = -ENOMEM; in resize_stripes()
2659 conf->slab_cache = sc; in resize_stripes()
2660 conf->active_name = 1-conf->active_name; in resize_stripes()
2665 list_del_init(&nsh->lru); in resize_stripes()
2668 for (i = 0; i < nsh->nr_pages; i++) { in resize_stripes()
2669 if (nsh->pages[i]) in resize_stripes()
2671 nsh->pages[i] = alloc_page(GFP_NOIO); in resize_stripes()
2672 if (!nsh->pages[i]) in resize_stripes()
2673 err = -ENOMEM; in resize_stripes()
2676 for (i = conf->raid_disks; i < newsize; i++) { in resize_stripes()
2677 if (nsh->dev[i].page) in resize_stripes()
2679 nsh->dev[i].page = raid5_get_dev_page(nsh, i); in resize_stripes()
2680 nsh->dev[i].orig_page = nsh->dev[i].page; in resize_stripes()
2681 nsh->dev[i].offset = raid5_get_page_offset(nsh, i); in resize_stripes()
2684 for (i=conf->raid_disks; i < newsize; i++) in resize_stripes()
2685 if (nsh->dev[i].page == NULL) { in resize_stripes()
2687 nsh->dev[i].page = p; in resize_stripes()
2688 nsh->dev[i].orig_page = p; in resize_stripes()
2689 nsh->dev[i].offset = 0; in resize_stripes()
2691 err = -ENOMEM; in resize_stripes()
2699 conf->pool_size = newsize; in resize_stripes()
2700 mutex_unlock(&conf->cache_size_mutex); in resize_stripes()
2705 static int drop_one_stripe(struct r5conf *conf) in drop_one_stripe() argument
2708 int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK; in drop_one_stripe()
2710 spin_lock_irq(conf->hash_locks + hash); in drop_one_stripe()
2711 sh = get_free_stripe(conf, hash); in drop_one_stripe()
2712 spin_unlock_irq(conf->hash_locks + hash); in drop_one_stripe()
2715 BUG_ON(atomic_read(&sh->count)); in drop_one_stripe()
2717 free_stripe(conf->slab_cache, sh); in drop_one_stripe()
2718 atomic_dec(&conf->active_stripes); in drop_one_stripe()
2719 WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1); in drop_one_stripe()
2723 static void shrink_stripes(struct r5conf *conf) in shrink_stripes() argument
2725 while (conf->max_nr_stripes && in shrink_stripes()
2726 drop_one_stripe(conf)) in shrink_stripes()
2729 kmem_cache_destroy(conf->slab_cache); in shrink_stripes()
2730 conf->slab_cache = NULL; in shrink_stripes()
2740 atomic_read(&rcu_access_pointer(rdev)->nr_pending)); in rdev_pend_deref()
2752 lockdep_is_held(&mddev->reconfig_mutex)); in rdev_mdlock_deref()
2757 struct stripe_head *sh = bi->bi_private; in raid5_end_read_request()
2758 struct r5conf *conf = sh->raid_conf; in raid5_end_read_request() local
2759 int disks = sh->disks, i; in raid5_end_read_request()
2764 if (bi == &sh->dev[i].req) in raid5_end_read_request()
2768 (unsigned long long)sh->sector, i, atomic_read(&sh->count), in raid5_end_read_request()
2769 bi->bi_status); in raid5_end_read_request()
2774 if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) in raid5_end_read_request()
2780 rdev = rdev_pend_deref(conf->disks[i].replacement); in raid5_end_read_request()
2782 rdev = rdev_pend_deref(conf->disks[i].rdev); in raid5_end_read_request()
2784 if (use_new_offset(conf, sh)) in raid5_end_read_request()
2785 s = sh->sector + rdev->new_data_offset; in raid5_end_read_request()
2787 s = sh->sector + rdev->data_offset; in raid5_end_read_request()
2788 if (!bi->bi_status) { in raid5_end_read_request()
2789 set_bit(R5_UPTODATE, &sh->dev[i].flags); in raid5_end_read_request()
2790 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { in raid5_end_read_request()
2797 mdname(conf->mddev), RAID5_STRIPE_SECTORS(conf), in raid5_end_read_request()
2799 rdev->bdev); in raid5_end_read_request()
2800 atomic_add(RAID5_STRIPE_SECTORS(conf), &rdev->corrected_errors); in raid5_end_read_request()
2801 clear_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2802 clear_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_read_request()
2803 } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) in raid5_end_read_request()
2804 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2806 if (test_bit(R5_InJournal, &sh->dev[i].flags)) in raid5_end_read_request()
2811 set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags); in raid5_end_read_request()
2813 if (atomic_read(&rdev->read_errors)) in raid5_end_read_request()
2814 atomic_set(&rdev->read_errors, 0); in raid5_end_read_request()
2819 clear_bit(R5_UPTODATE, &sh->dev[i].flags); in raid5_end_read_request()
2820 if (!(bi->bi_status == BLK_STS_PROTECTION)) in raid5_end_read_request()
2821 atomic_inc(&rdev->read_errors); in raid5_end_read_request()
2822 if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) in raid5_end_read_request()
2825 mdname(conf->mddev), in raid5_end_read_request()
2827 rdev->bdev); in raid5_end_read_request()
2828 else if (conf->mddev->degraded >= conf->max_degraded) { in raid5_end_read_request()
2832 mdname(conf->mddev), in raid5_end_read_request()
2834 rdev->bdev); in raid5_end_read_request()
2835 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { in raid5_end_read_request()
2840 mdname(conf->mddev), in raid5_end_read_request()
2842 rdev->bdev); in raid5_end_read_request()
2843 } else if (atomic_read(&rdev->read_errors) in raid5_end_read_request()
2844 > conf->max_nr_stripes) { in raid5_end_read_request()
2845 if (!test_bit(Faulty, &rdev->flags)) { in raid5_end_read_request()
2847 mdname(conf->mddev), in raid5_end_read_request()
2848 atomic_read(&rdev->read_errors), in raid5_end_read_request()
2849 conf->max_nr_stripes); in raid5_end_read_request()
2851 mdname(conf->mddev), rdev->bdev); in raid5_end_read_request()
2855 if (set_bad && test_bit(In_sync, &rdev->flags) in raid5_end_read_request()
2856 && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) in raid5_end_read_request()
2859 if (sh->qd_idx >= 0 && sh->pd_idx == i) in raid5_end_read_request()
2860 set_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2861 else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { in raid5_end_read_request()
2862 set_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2863 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2865 set_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2867 clear_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2868 clear_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_read_request()
2870 && test_bit(In_sync, &rdev->flags) in raid5_end_read_request()
2872 rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0))) in raid5_end_read_request()
2873 md_error(conf->mddev, rdev); in raid5_end_read_request()
2876 rdev_dec_pending(rdev, conf->mddev); in raid5_end_read_request()
2878 clear_bit(R5_LOCKED, &sh->dev[i].flags); in raid5_end_read_request()
2879 set_bit(STRIPE_HANDLE, &sh->state); in raid5_end_read_request()
2885 struct stripe_head *sh = bi->bi_private; in raid5_end_write_request()
2886 struct r5conf *conf = sh->raid_conf; in raid5_end_write_request() local
2887 int disks = sh->disks, i; in raid5_end_write_request()
2894 if (bi == &sh->dev[i].req) { in raid5_end_write_request()
2895 rdev = rdev_pend_deref(conf->disks[i].rdev); in raid5_end_write_request()
2898 if (bi == &sh->dev[i].rreq) { in raid5_end_write_request()
2899 rdev = rdev_pend_deref(conf->disks[i].replacement); in raid5_end_write_request()
2907 rdev = rdev_pend_deref(conf->disks[i].rdev); in raid5_end_write_request()
2912 (unsigned long long)sh->sector, i, atomic_read(&sh->count), in raid5_end_write_request()
2913 bi->bi_status); in raid5_end_write_request()
2920 if (bi->bi_status) in raid5_end_write_request()
2921 md_error(conf->mddev, rdev); in raid5_end_write_request()
2922 else if (is_badblock(rdev, sh->sector, in raid5_end_write_request()
2923 RAID5_STRIPE_SECTORS(conf), in raid5_end_write_request()
2925 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); in raid5_end_write_request()
2927 if (bi->bi_status) { in raid5_end_write_request()
2928 set_bit(STRIPE_DEGRADED, &sh->state); in raid5_end_write_request()
2929 set_bit(WriteErrorSeen, &rdev->flags); in raid5_end_write_request()
2930 set_bit(R5_WriteError, &sh->dev[i].flags); in raid5_end_write_request()
2931 if (!test_and_set_bit(WantReplacement, &rdev->flags)) in raid5_end_write_request()
2933 &rdev->mddev->recovery); in raid5_end_write_request()
2934 } else if (is_badblock(rdev, sh->sector, in raid5_end_write_request()
2935 RAID5_STRIPE_SECTORS(conf), in raid5_end_write_request()
2937 set_bit(R5_MadeGood, &sh->dev[i].flags); in raid5_end_write_request()
2938 if (test_bit(R5_ReadError, &sh->dev[i].flags)) in raid5_end_write_request()
2941 * a re-write. in raid5_end_write_request()
2943 set_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_write_request()
2946 rdev_dec_pending(rdev, conf->mddev); in raid5_end_write_request()
2948 if (sh->batch_head && bi->bi_status && !replacement) in raid5_end_write_request()
2949 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); in raid5_end_write_request()
2952 if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) in raid5_end_write_request()
2953 clear_bit(R5_LOCKED, &sh->dev[i].flags); in raid5_end_write_request()
2954 set_bit(STRIPE_HANDLE, &sh->state); in raid5_end_write_request()
2956 if (sh->batch_head && sh != sh->batch_head) in raid5_end_write_request()
2957 raid5_release_stripe(sh->batch_head); in raid5_end_write_request()
2963 struct r5conf *conf = mddev->private; in raid5_error() local
2968 mdname(mddev), rdev->bdev); in raid5_error()
2970 spin_lock_irqsave(&conf->device_lock, flags); in raid5_error()
2971 set_bit(Faulty, &rdev->flags); in raid5_error()
2972 clear_bit(In_sync, &rdev->flags); in raid5_error()
2973 mddev->degraded = raid5_calc_degraded(conf); in raid5_error()
2975 if (has_failed(conf)) { in raid5_error()
2976 set_bit(MD_BROKEN, &conf->mddev->flags); in raid5_error()
2977 conf->recovery_disabled = mddev->recovery_disabled; in raid5_error()
2980 mdname(mddev), mddev->degraded, conf->raid_disks); in raid5_error()
2983 mdname(mddev), conf->raid_disks - mddev->degraded); in raid5_error()
2986 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_error()
2987 set_bit(MD_RECOVERY_INTR, &mddev->recovery); in raid5_error()
2989 set_bit(Blocked, &rdev->flags); in raid5_error()
2990 set_mask_bits(&mddev->sb_flags, 0, in raid5_error()
2999 sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, in raid5_compute_sector() argument
3009 int algorithm = previous ? conf->prev_algo in raid5_compute_sector()
3010 : conf->algorithm; in raid5_compute_sector()
3011 int sectors_per_chunk = previous ? conf->prev_chunk_sectors in raid5_compute_sector()
3012 : conf->chunk_sectors; in raid5_compute_sector()
3013 int raid_disks = previous ? conf->previous_raid_disks in raid5_compute_sector()
3014 : conf->raid_disks; in raid5_compute_sector()
3015 int data_disks = raid_disks - conf->max_degraded; in raid5_compute_sector()
3034 pd_idx = qd_idx = -1; in raid5_compute_sector()
3035 switch(conf->level) { in raid5_compute_sector()
3042 pd_idx = data_disks - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3052 pd_idx = data_disks - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3074 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3076 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3085 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3092 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3118 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3132 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3134 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3144 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3145 qd_idx = (pd_idx + raid_disks - 1) % raid_disks; in raid5_compute_sector()
3152 pd_idx = data_disks - sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3155 qd_idx = raid_disks - 1; in raid5_compute_sector()
3159 pd_idx = sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3162 qd_idx = raid_disks - 1; in raid5_compute_sector()
3166 pd_idx = data_disks - sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3167 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); in raid5_compute_sector()
3168 qd_idx = raid_disks - 1; in raid5_compute_sector()
3172 pd_idx = sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3173 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); in raid5_compute_sector()
3174 qd_idx = raid_disks - 1; in raid5_compute_sector()
3180 qd_idx = raid_disks - 1; in raid5_compute_sector()
3190 sh->pd_idx = pd_idx; in raid5_compute_sector()
3191 sh->qd_idx = qd_idx; in raid5_compute_sector()
3192 sh->ddf_layout = ddf_layout; in raid5_compute_sector()
3203 struct r5conf *conf = sh->raid_conf; in raid5_compute_blocknr() local
3204 int raid_disks = sh->disks; in raid5_compute_blocknr()
3205 int data_disks = raid_disks - conf->max_degraded; in raid5_compute_blocknr()
3206 sector_t new_sector = sh->sector, check; in raid5_compute_blocknr()
3207 int sectors_per_chunk = previous ? conf->prev_chunk_sectors in raid5_compute_blocknr()
3208 : conf->chunk_sectors; in raid5_compute_blocknr()
3209 int algorithm = previous ? conf->prev_algo in raid5_compute_blocknr()
3210 : conf->algorithm; in raid5_compute_blocknr()
3221 if (i == sh->pd_idx) in raid5_compute_blocknr()
3223 switch(conf->level) { in raid5_compute_blocknr()
3229 if (i > sh->pd_idx) in raid5_compute_blocknr()
3230 i--; in raid5_compute_blocknr()
3234 if (i < sh->pd_idx) in raid5_compute_blocknr()
3236 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3239 i -= 1; in raid5_compute_blocknr()
3248 if (i == sh->qd_idx) in raid5_compute_blocknr()
3255 if (sh->pd_idx == raid_disks-1) in raid5_compute_blocknr()
3256 i--; /* Q D D D P */ in raid5_compute_blocknr()
3257 else if (i > sh->pd_idx) in raid5_compute_blocknr()
3258 i -= 2; /* D D P Q D */ in raid5_compute_blocknr()
3262 if (sh->pd_idx == raid_disks-1) in raid5_compute_blocknr()
3263 i--; /* Q D D D P */ in raid5_compute_blocknr()
3266 if (i < sh->pd_idx) in raid5_compute_blocknr()
3268 i -= (sh->pd_idx + 2); in raid5_compute_blocknr()
3272 i -= 2; in raid5_compute_blocknr()
3278 if (sh->pd_idx == 0) in raid5_compute_blocknr()
3279 i--; /* P D D D Q */ in raid5_compute_blocknr()
3282 if (i < sh->pd_idx) in raid5_compute_blocknr()
3284 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3289 if (i > sh->pd_idx) in raid5_compute_blocknr()
3290 i--; in raid5_compute_blocknr()
3294 if (i < sh->pd_idx) in raid5_compute_blocknr()
3296 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3299 i -= 1; in raid5_compute_blocknr()
3310 check = raid5_compute_sector(conf, r_sector, in raid5_compute_blocknr()
3312 if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx in raid5_compute_blocknr()
3313 || sh2.qd_idx != sh->qd_idx) { in raid5_compute_blocknr()
3315 mdname(conf->mddev)); in raid5_compute_blocknr()
3328 * 1. degraded stripe has a non-overwrite to the missing dev, AND this
3331 * In this case, when reading data for the non-overwrite dev, it is
3339 * It is important to be able to flush all stripes in raid5-cache.
3342 * stripe, we need to reserve (conf->raid_disk + 1) pages per stripe
3344 * operation, we only need (conf->max_degraded + 1) pages per stripe.
3355 * based on data in stripe cache. The array is read-only to upper
3359 static inline bool delay_towrite(struct r5conf *conf, in delay_towrite() argument
3364 if (!test_bit(R5_OVERWRITE, &dev->flags) && in delay_towrite()
3365 !test_bit(R5_Insync, &dev->flags) && s->injournal) in delay_towrite()
3368 if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && in delay_towrite()
3369 s->injournal > 0) in delay_towrite()
3372 if (s->log_failed && s->injournal) in delay_towrite()
3381 int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks; in schedule_reconstruction()
3382 struct r5conf *conf = sh->raid_conf; in schedule_reconstruction() local
3383 int level = conf->level; in schedule_reconstruction()
3394 for (i = disks; i--; ) { in schedule_reconstruction()
3395 struct r5dev *dev = &sh->dev[i]; in schedule_reconstruction()
3397 if (dev->towrite && !delay_towrite(conf, dev, s)) { in schedule_reconstruction()
3398 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3399 set_bit(R5_Wantdrain, &dev->flags); in schedule_reconstruction()
3401 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3402 s->locked++; in schedule_reconstruction()
3403 } else if (test_bit(R5_InJournal, &dev->flags)) { in schedule_reconstruction()
3404 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3405 s->locked++; in schedule_reconstruction()
3413 if (!s->locked) in schedule_reconstruction()
3416 sh->reconstruct_state = reconstruct_state_drain_run; in schedule_reconstruction()
3417 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); in schedule_reconstruction()
3419 sh->reconstruct_state = reconstruct_state_run; in schedule_reconstruction()
3421 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); in schedule_reconstruction()
3423 if (s->locked + conf->max_degraded == disks) in schedule_reconstruction()
3424 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) in schedule_reconstruction()
3425 atomic_inc(&conf->pending_full_writes); in schedule_reconstruction()
3427 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || in schedule_reconstruction()
3428 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); in schedule_reconstruction()
3430 (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) || in schedule_reconstruction()
3431 test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags)))); in schedule_reconstruction()
3433 for (i = disks; i--; ) { in schedule_reconstruction()
3434 struct r5dev *dev = &sh->dev[i]; in schedule_reconstruction()
3438 if (dev->towrite && in schedule_reconstruction()
3439 (test_bit(R5_UPTODATE, &dev->flags) || in schedule_reconstruction()
3440 test_bit(R5_Wantcompute, &dev->flags))) { in schedule_reconstruction()
3441 set_bit(R5_Wantdrain, &dev->flags); in schedule_reconstruction()
3442 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3443 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3444 s->locked++; in schedule_reconstruction()
3445 } else if (test_bit(R5_InJournal, &dev->flags)) { in schedule_reconstruction()
3446 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3447 s->locked++; in schedule_reconstruction()
3450 if (!s->locked) in schedule_reconstruction()
3451 /* False alarm - nothing to do */ in schedule_reconstruction()
3453 sh->reconstruct_state = reconstruct_state_prexor_drain_run; in schedule_reconstruction()
3454 set_bit(STRIPE_OP_PREXOR, &s->ops_request); in schedule_reconstruction()
3455 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); in schedule_reconstruction()
3456 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); in schedule_reconstruction()
3462 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); in schedule_reconstruction()
3463 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); in schedule_reconstruction()
3464 s->locked++; in schedule_reconstruction()
3467 int qd_idx = sh->qd_idx; in schedule_reconstruction()
3468 struct r5dev *dev = &sh->dev[qd_idx]; in schedule_reconstruction()
3470 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3471 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3472 s->locked++; in schedule_reconstruction()
3475 if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page && in schedule_reconstruction()
3476 test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && in schedule_reconstruction()
3477 !test_bit(STRIPE_FULL_WRITE, &sh->state) && in schedule_reconstruction()
3478 test_bit(R5_Insync, &sh->dev[pd_idx].flags)) in schedule_reconstruction()
3479 set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request); in schedule_reconstruction()
3482 __func__, (unsigned long long)sh->sector, in schedule_reconstruction()
3483 s->locked, s->ops_request); in schedule_reconstruction()
3489 struct r5conf *conf = sh->raid_conf; in stripe_bio_overlaps() local
3493 bi->bi_iter.bi_sector, sh->sector); in stripe_bio_overlaps()
3496 if (sh->batch_head) in stripe_bio_overlaps()
3500 bip = &sh->dev[dd_idx].towrite; in stripe_bio_overlaps()
3502 bip = &sh->dev[dd_idx].toread; in stripe_bio_overlaps()
3504 while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) { in stripe_bio_overlaps()
3505 if (bio_end_sector(*bip) > bi->bi_iter.bi_sector) in stripe_bio_overlaps()
3507 bip = &(*bip)->bi_next; in stripe_bio_overlaps()
3510 if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi)) in stripe_bio_overlaps()
3513 if (forwrite && raid5_has_ppl(conf)) { in stripe_bio_overlaps()
3527 for (i = 0; i < sh->disks; i++) { in stripe_bio_overlaps()
3528 if (i != sh->pd_idx && in stripe_bio_overlaps()
3529 (i == dd_idx || sh->dev[i].towrite)) { in stripe_bio_overlaps()
3530 sector = sh->dev[i].sector; in stripe_bio_overlaps()
3539 if (first + conf->chunk_sectors * (count - 1) != last) in stripe_bio_overlaps()
3549 struct r5conf *conf = sh->raid_conf; in __add_stripe_bio() local
3554 bip = &sh->dev[dd_idx].towrite; in __add_stripe_bio()
3558 bip = &sh->dev[dd_idx].toread; in __add_stripe_bio()
3561 while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) in __add_stripe_bio()
3562 bip = &(*bip)->bi_next; in __add_stripe_bio()
3565 clear_bit(STRIPE_BATCH_READY, &sh->state); in __add_stripe_bio()
3567 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); in __add_stripe_bio()
3569 bi->bi_next = *bip; in __add_stripe_bio()
3572 md_write_inc(conf->mddev, bi); in __add_stripe_bio()
3576 sector_t sector = sh->dev[dd_idx].sector; in __add_stripe_bio()
3577 for (bi=sh->dev[dd_idx].towrite; in __add_stripe_bio()
3578 sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) && in __add_stripe_bio()
3579 bi && bi->bi_iter.bi_sector <= sector; in __add_stripe_bio()
3580 bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) { in __add_stripe_bio()
3584 if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf)) in __add_stripe_bio()
3585 if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags)) in __add_stripe_bio()
3586 sh->overwrite_disks++; in __add_stripe_bio()
3590 (*bip)->bi_iter.bi_sector, sh->sector, dd_idx, in __add_stripe_bio()
3591 sh->dev[dd_idx].sector); in __add_stripe_bio()
3593 if (conf->mddev->bitmap && firstwrite) { in __add_stripe_bio()
3606 set_bit(STRIPE_BITMAP_PENDING, &sh->state); in __add_stripe_bio()
3607 spin_unlock_irq(&sh->stripe_lock); in __add_stripe_bio()
3608 md_bitmap_startwrite(conf->mddev->bitmap, sh->sector, in __add_stripe_bio()
3609 RAID5_STRIPE_SECTORS(conf), 0); in __add_stripe_bio()
3610 spin_lock_irq(&sh->stripe_lock); in __add_stripe_bio()
3611 clear_bit(STRIPE_BITMAP_PENDING, &sh->state); in __add_stripe_bio()
3612 if (!sh->batch_head) { in __add_stripe_bio()
3613 sh->bm_seq = conf->seq_flush+1; in __add_stripe_bio()
3614 set_bit(STRIPE_BIT_DELAY, &sh->state); in __add_stripe_bio()
3627 spin_lock_irq(&sh->stripe_lock); in add_stripe_bio()
3630 set_bit(R5_Overlap, &sh->dev[dd_idx].flags); in add_stripe_bio()
3631 spin_unlock_irq(&sh->stripe_lock); in add_stripe_bio()
3636 spin_unlock_irq(&sh->stripe_lock); in add_stripe_bio()
3640 static void end_reshape(struct r5conf *conf);
3642 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, in stripe_set_idx() argument
3646 previous ? conf->prev_chunk_sectors : conf->chunk_sectors; in stripe_set_idx()
3649 int disks = previous ? conf->previous_raid_disks : conf->raid_disks; in stripe_set_idx()
3651 raid5_compute_sector(conf, in stripe_set_idx()
3652 stripe * (disks - conf->max_degraded) in stripe_set_idx()
3659 handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, in handle_failed_stripe() argument
3663 BUG_ON(sh->batch_head); in handle_failed_stripe()
3664 for (i = disks; i--; ) { in handle_failed_stripe()
3668 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { in handle_failed_stripe()
3671 rdev = rcu_dereference(conf->disks[i].rdev); in handle_failed_stripe()
3672 if (rdev && test_bit(In_sync, &rdev->flags) && in handle_failed_stripe()
3673 !test_bit(Faulty, &rdev->flags)) in handle_failed_stripe()
3674 atomic_inc(&rdev->nr_pending); in handle_failed_stripe()
3681 sh->sector, in handle_failed_stripe()
3682 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_stripe()
3683 md_error(conf->mddev, rdev); in handle_failed_stripe()
3684 rdev_dec_pending(rdev, conf->mddev); in handle_failed_stripe()
3687 spin_lock_irq(&sh->stripe_lock); in handle_failed_stripe()
3689 bi = sh->dev[i].towrite; in handle_failed_stripe()
3690 sh->dev[i].towrite = NULL; in handle_failed_stripe()
3691 sh->overwrite_disks = 0; in handle_failed_stripe()
3692 spin_unlock_irq(&sh->stripe_lock); in handle_failed_stripe()
3698 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in handle_failed_stripe()
3699 wake_up(&conf->wait_for_overlap); in handle_failed_stripe()
3701 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3702 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3703 struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3705 md_write_end(conf->mddev); in handle_failed_stripe()
3710 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_failed_stripe()
3711 RAID5_STRIPE_SECTORS(conf), 0, 0); in handle_failed_stripe()
3714 bi = sh->dev[i].written; in handle_failed_stripe()
3715 sh->dev[i].written = NULL; in handle_failed_stripe()
3716 if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) { in handle_failed_stripe()
3717 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in handle_failed_stripe()
3718 sh->dev[i].page = sh->dev[i].orig_page; in handle_failed_stripe()
3722 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3723 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3724 struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3726 md_write_end(conf->mddev); in handle_failed_stripe()
3731 /* fail any reads if this device is non-operational and in handle_failed_stripe()
3734 if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && in handle_failed_stripe()
3735 s->failed > conf->max_degraded && in handle_failed_stripe()
3736 (!test_bit(R5_Insync, &sh->dev[i].flags) || in handle_failed_stripe()
3737 test_bit(R5_ReadError, &sh->dev[i].flags))) { in handle_failed_stripe()
3738 spin_lock_irq(&sh->stripe_lock); in handle_failed_stripe()
3739 bi = sh->dev[i].toread; in handle_failed_stripe()
3740 sh->dev[i].toread = NULL; in handle_failed_stripe()
3741 spin_unlock_irq(&sh->stripe_lock); in handle_failed_stripe()
3742 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in handle_failed_stripe()
3743 wake_up(&conf->wait_for_overlap); in handle_failed_stripe()
3745 s->to_read--; in handle_failed_stripe()
3746 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3747 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3749 r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3756 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_failed_stripe()
3757 RAID5_STRIPE_SECTORS(conf), 0, 0); in handle_failed_stripe()
3759 * still be locked - so just clear all R5_LOCKED flags in handle_failed_stripe()
3761 clear_bit(R5_LOCKED, &sh->dev[i].flags); in handle_failed_stripe()
3763 s->to_write = 0; in handle_failed_stripe()
3764 s->written = 0; in handle_failed_stripe()
3766 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) in handle_failed_stripe()
3767 if (atomic_dec_and_test(&conf->pending_full_writes)) in handle_failed_stripe()
3768 md_wakeup_thread(conf->mddev->thread); in handle_failed_stripe()
3772 handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, in handle_failed_sync() argument
3778 BUG_ON(sh->batch_head); in handle_failed_sync()
3779 clear_bit(STRIPE_SYNCING, &sh->state); in handle_failed_sync()
3780 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) in handle_failed_sync()
3781 wake_up(&conf->wait_for_overlap); in handle_failed_sync()
3782 s->syncing = 0; in handle_failed_sync()
3783 s->replacing = 0; in handle_failed_sync()
3789 * non-sync devices, or abort the recovery in handle_failed_sync()
3791 if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) { in handle_failed_sync()
3796 for (i = 0; i < conf->raid_disks; i++) { in handle_failed_sync()
3797 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in handle_failed_sync()
3799 && !test_bit(Faulty, &rdev->flags) in handle_failed_sync()
3800 && !test_bit(In_sync, &rdev->flags) in handle_failed_sync()
3801 && !rdev_set_badblocks(rdev, sh->sector, in handle_failed_sync()
3802 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_sync()
3804 rdev = rcu_dereference(conf->disks[i].replacement); in handle_failed_sync()
3806 && !test_bit(Faulty, &rdev->flags) in handle_failed_sync()
3807 && !test_bit(In_sync, &rdev->flags) in handle_failed_sync()
3808 && !rdev_set_badblocks(rdev, sh->sector, in handle_failed_sync()
3809 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_sync()
3814 conf->recovery_disabled = in handle_failed_sync()
3815 conf->mddev->recovery_disabled; in handle_failed_sync()
3817 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), !abort); in handle_failed_sync()
3826 rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement); in want_replace()
3828 && !test_bit(Faulty, &rdev->flags) in want_replace()
3829 && !test_bit(In_sync, &rdev->flags) in want_replace()
3830 && (rdev->recovery_offset <= sh->sector in want_replace()
3831 || rdev->mddev->recovery_cp <= sh->sector)) in want_replace()
3840 struct r5dev *dev = &sh->dev[disk_idx]; in need_this_block()
3841 struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], in need_this_block()
3842 &sh->dev[s->failed_num[1]] }; in need_this_block()
3844 bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW); in need_this_block()
3847 if (test_bit(R5_LOCKED, &dev->flags) || in need_this_block()
3848 test_bit(R5_UPTODATE, &dev->flags)) in need_this_block()
3854 if (dev->toread || in need_this_block()
3855 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags))) in need_this_block()
3859 if (s->syncing || s->expanding || in need_this_block()
3860 (s->replacing && want_replace(sh, disk_idx))) in need_this_block()
3866 if ((s->failed >= 1 && fdev[0]->toread) || in need_this_block()
3867 (s->failed >= 2 && fdev[1]->toread)) in need_this_block()
3873 /* Sometimes neither read-modify-write nor reconstruct-write in need_this_block()
3875 * can. Then the parity-update is certain to have enough to in need_this_block()
3881 if (!s->failed || !s->to_write) in need_this_block()
3884 if (test_bit(R5_Insync, &dev->flags) && in need_this_block()
3885 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in need_this_block()
3886 /* Pre-reads at not permitted until after short delay in need_this_block()
3893 for (i = 0; i < s->failed && i < 2; i++) { in need_this_block()
3894 if (fdev[i]->towrite && in need_this_block()
3895 !test_bit(R5_UPTODATE, &fdev[i]->flags) && in need_this_block()
3896 !test_bit(R5_OVERWRITE, &fdev[i]->flags)) in need_this_block()
3904 if (s->failed >= 2 && in need_this_block()
3905 (fdev[i]->towrite || in need_this_block()
3906 s->failed_num[i] == sh->pd_idx || in need_this_block()
3907 s->failed_num[i] == sh->qd_idx) && in need_this_block()
3908 !test_bit(R5_UPTODATE, &fdev[i]->flags)) in need_this_block()
3911 * reconstruct-write. in need_this_block()
3916 /* If we are forced to do a reconstruct-write, because parity in need_this_block()
3924 sh->sector < sh->raid_conf->mddev->recovery_cp) in need_this_block()
3925 /* reconstruct-write isn't being forced */ in need_this_block()
3927 for (i = 0; i < s->failed && i < 2; i++) { in need_this_block()
3928 if (s->failed_num[i] != sh->pd_idx && in need_this_block()
3929 s->failed_num[i] != sh->qd_idx && in need_this_block()
3930 !test_bit(R5_UPTODATE, &fdev[i]->flags) && in need_this_block()
3931 !test_bit(R5_OVERWRITE, &fdev[i]->flags)) in need_this_block()
3938 /* fetch_block - checks the given member device to see if its data needs
3947 struct r5dev *dev = &sh->dev[disk_idx]; in fetch_block()
3954 BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); in fetch_block()
3955 BUG_ON(test_bit(R5_Wantread, &dev->flags)); in fetch_block()
3956 BUG_ON(sh->batch_head); in fetch_block()
3959 * In the raid6 case if the only non-uptodate disk is P in fetch_block()
3961 * drives. It is safe to compute rather than re-read P. in fetch_block()
3967 if ((s->uptodate == disks - 1) && in fetch_block()
3968 ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) || in fetch_block()
3969 (s->failed && (disk_idx == s->failed_num[0] || in fetch_block()
3970 disk_idx == s->failed_num[1])))) { in fetch_block()
3975 (unsigned long long)sh->sector, disk_idx); in fetch_block()
3976 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in fetch_block()
3977 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in fetch_block()
3978 set_bit(R5_Wantcompute, &dev->flags); in fetch_block()
3979 sh->ops.target = disk_idx; in fetch_block()
3980 sh->ops.target2 = -1; /* no 2nd target */ in fetch_block()
3981 s->req_compute = 1; in fetch_block()
3988 s->uptodate++; in fetch_block()
3990 } else if (s->uptodate == disks-2 && s->failed >= 2) { in fetch_block()
3991 /* Computing 2-failure is *very* expensive; only in fetch_block()
3995 for (other = disks; other--; ) { in fetch_block()
3999 &sh->dev[other].flags)) in fetch_block()
4004 (unsigned long long)sh->sector, in fetch_block()
4006 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in fetch_block()
4007 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in fetch_block()
4008 set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags); in fetch_block()
4009 set_bit(R5_Wantcompute, &sh->dev[other].flags); in fetch_block()
4010 sh->ops.target = disk_idx; in fetch_block()
4011 sh->ops.target2 = other; in fetch_block()
4012 s->uptodate += 2; in fetch_block()
4013 s->req_compute = 1; in fetch_block()
4015 } else if (test_bit(R5_Insync, &dev->flags)) { in fetch_block()
4016 set_bit(R5_LOCKED, &dev->flags); in fetch_block()
4017 set_bit(R5_Wantread, &dev->flags); in fetch_block()
4018 s->locked++; in fetch_block()
4020 disk_idx, s->syncing); in fetch_block()
4028 * handle_stripe_fill - read or compute data to satisfy pending requests.
4040 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && in handle_stripe_fill()
4041 !sh->reconstruct_state) { in handle_stripe_fill()
4050 if (s->to_read && s->injournal && s->failed) { in handle_stripe_fill()
4051 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) in handle_stripe_fill()
4056 for (i = disks; i--; ) in handle_stripe_fill()
4061 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_fill()
4071 static void handle_stripe_clean_event(struct r5conf *conf, in handle_stripe_clean_event() argument
4080 for (i = disks; i--; ) in handle_stripe_clean_event()
4081 if (sh->dev[i].written) { in handle_stripe_clean_event()
4082 dev = &sh->dev[i]; in handle_stripe_clean_event()
4083 if (!test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_clean_event()
4084 (test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_clean_event()
4085 test_bit(R5_Discard, &dev->flags) || in handle_stripe_clean_event()
4086 test_bit(R5_SkipCopy, &dev->flags))) { in handle_stripe_clean_event()
4090 if (test_and_clear_bit(R5_Discard, &dev->flags)) in handle_stripe_clean_event()
4091 clear_bit(R5_UPTODATE, &dev->flags); in handle_stripe_clean_event()
4092 if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) { in handle_stripe_clean_event()
4093 WARN_ON(test_bit(R5_UPTODATE, &dev->flags)); in handle_stripe_clean_event()
4098 dev->page = dev->orig_page; in handle_stripe_clean_event()
4099 wbi = dev->written; in handle_stripe_clean_event()
4100 dev->written = NULL; in handle_stripe_clean_event()
4101 while (wbi && wbi->bi_iter.bi_sector < in handle_stripe_clean_event()
4102 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in handle_stripe_clean_event()
4103 wbi2 = r5_next_bio(conf, wbi, dev->sector); in handle_stripe_clean_event()
4104 md_write_end(conf->mddev); in handle_stripe_clean_event()
4108 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_stripe_clean_event()
4109 RAID5_STRIPE_SECTORS(conf), in handle_stripe_clean_event()
4110 !test_bit(STRIPE_DEGRADED, &sh->state), in handle_stripe_clean_event()
4112 if (head_sh->batch_head) { in handle_stripe_clean_event()
4113 sh = list_first_entry(&sh->batch_list, in handle_stripe_clean_event()
4117 dev = &sh->dev[i]; in handle_stripe_clean_event()
4122 dev = &sh->dev[i]; in handle_stripe_clean_event()
4123 } else if (test_bit(R5_Discard, &dev->flags)) in handle_stripe_clean_event()
4130 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { in handle_stripe_clean_event()
4132 clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); in handle_stripe_clean_event()
4133 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); in handle_stripe_clean_event()
4134 if (sh->qd_idx >= 0) { in handle_stripe_clean_event()
4135 clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); in handle_stripe_clean_event()
4136 clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); in handle_stripe_clean_event()
4139 clear_bit(STRIPE_DISCARD, &sh->state); in handle_stripe_clean_event()
4146 hash = sh->hash_lock_index; in handle_stripe_clean_event()
4147 spin_lock_irq(conf->hash_locks + hash); in handle_stripe_clean_event()
4149 spin_unlock_irq(conf->hash_locks + hash); in handle_stripe_clean_event()
4150 if (head_sh->batch_head) { in handle_stripe_clean_event()
4151 sh = list_first_entry(&sh->batch_list, in handle_stripe_clean_event()
4158 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) in handle_stripe_clean_event()
4159 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_clean_event()
4163 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) in handle_stripe_clean_event()
4164 if (atomic_dec_and_test(&conf->pending_full_writes)) in handle_stripe_clean_event()
4165 md_wakeup_thread(conf->mddev->thread); in handle_stripe_clean_event()
4167 if (head_sh->batch_head && do_endio) in handle_stripe_clean_event()
4173 * old data. This page is stored in dev->orig_page.
4181 return (test_bit(R5_UPTODATE, &dev->flags)) && in uptodate_for_rmw()
4182 (!test_bit(R5_InJournal, &dev->flags) || in uptodate_for_rmw()
4183 test_bit(R5_OrigPageUPTDODATE, &dev->flags)); in uptodate_for_rmw()
4186 static int handle_stripe_dirtying(struct r5conf *conf, in handle_stripe_dirtying() argument
4192 sector_t recovery_cp = conf->mddev->recovery_cp; in handle_stripe_dirtying()
4197 * In this case, we need to always do reconstruct-write, to ensure in handle_stripe_dirtying()
4198 * that in case of drive failure or read-error correction, we in handle_stripe_dirtying()
4201 if (conf->rmw_level == PARITY_DISABLE_RMW || in handle_stripe_dirtying()
4202 (recovery_cp < MaxSector && sh->sector >= recovery_cp && in handle_stripe_dirtying()
4203 s->failed == 0)) { in handle_stripe_dirtying()
4204 /* Calculate the real rcw later - for now make it in handle_stripe_dirtying()
4208 pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n", in handle_stripe_dirtying()
4209 conf->rmw_level, (unsigned long long)recovery_cp, in handle_stripe_dirtying()
4210 (unsigned long long)sh->sector); in handle_stripe_dirtying()
4211 } else for (i = disks; i--; ) { in handle_stripe_dirtying()
4213 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4214 if (((dev->towrite && !delay_towrite(conf, dev, s)) || in handle_stripe_dirtying()
4215 i == sh->pd_idx || i == sh->qd_idx || in handle_stripe_dirtying()
4216 test_bit(R5_InJournal, &dev->flags)) && in handle_stripe_dirtying()
4217 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4219 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4220 if (test_bit(R5_Insync, &dev->flags)) in handle_stripe_dirtying()
4226 if (!test_bit(R5_OVERWRITE, &dev->flags) && in handle_stripe_dirtying()
4227 i != sh->pd_idx && i != sh->qd_idx && in handle_stripe_dirtying()
4228 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4229 !(test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_dirtying()
4230 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4231 if (test_bit(R5_Insync, &dev->flags)) in handle_stripe_dirtying()
4239 (unsigned long long)sh->sector, sh->state, rmw, rcw); in handle_stripe_dirtying()
4240 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_dirtying()
4241 if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) { in handle_stripe_dirtying()
4242 /* prefer read-modify-write, but need to get some data */ in handle_stripe_dirtying()
4243 if (conf->mddev->queue) in handle_stripe_dirtying()
4244 blk_add_trace_msg(conf->mddev->queue, in handle_stripe_dirtying()
4246 (unsigned long long)sh->sector, rmw); in handle_stripe_dirtying()
4247 for (i = disks; i--; ) { in handle_stripe_dirtying()
4248 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4249 if (test_bit(R5_InJournal, &dev->flags) && in handle_stripe_dirtying()
4250 dev->page == dev->orig_page && in handle_stripe_dirtying()
4251 !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) { in handle_stripe_dirtying()
4256 dev->orig_page = p; in handle_stripe_dirtying()
4262 * disk_info->extra_page in handle_stripe_dirtying()
4265 &conf->cache_state)) { in handle_stripe_dirtying()
4271 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4272 s->waiting_extra_page = 1; in handle_stripe_dirtying()
4273 return -EAGAIN; in handle_stripe_dirtying()
4277 for (i = disks; i--; ) { in handle_stripe_dirtying()
4278 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4279 if (((dev->towrite && !delay_towrite(conf, dev, s)) || in handle_stripe_dirtying()
4280 i == sh->pd_idx || i == sh->qd_idx || in handle_stripe_dirtying()
4281 test_bit(R5_InJournal, &dev->flags)) && in handle_stripe_dirtying()
4282 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4284 test_bit(R5_Wantcompute, &dev->flags)) && in handle_stripe_dirtying()
4285 test_bit(R5_Insync, &dev->flags)) { in handle_stripe_dirtying()
4287 &sh->state)) { in handle_stripe_dirtying()
4288 pr_debug("Read_old block %d for r-m-w\n", in handle_stripe_dirtying()
4290 set_bit(R5_LOCKED, &dev->flags); in handle_stripe_dirtying()
4291 set_bit(R5_Wantread, &dev->flags); in handle_stripe_dirtying()
4292 s->locked++; in handle_stripe_dirtying()
4294 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4298 if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_PREFER_RMW)) && rcw > 0) { in handle_stripe_dirtying()
4302 for (i = disks; i--; ) { in handle_stripe_dirtying()
4303 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4304 if (!test_bit(R5_OVERWRITE, &dev->flags) && in handle_stripe_dirtying()
4305 i != sh->pd_idx && i != sh->qd_idx && in handle_stripe_dirtying()
4306 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4307 !(test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_dirtying()
4308 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4310 if (test_bit(R5_Insync, &dev->flags) && in handle_stripe_dirtying()
4312 &sh->state)) { in handle_stripe_dirtying()
4315 set_bit(R5_LOCKED, &dev->flags); in handle_stripe_dirtying()
4316 set_bit(R5_Wantread, &dev->flags); in handle_stripe_dirtying()
4317 s->locked++; in handle_stripe_dirtying()
4320 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4323 if (rcw && conf->mddev->queue) in handle_stripe_dirtying()
4324 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", in handle_stripe_dirtying()
4325 (unsigned long long)sh->sector, in handle_stripe_dirtying()
4326 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); in handle_stripe_dirtying()
4330 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in handle_stripe_dirtying()
4331 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4343 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && in handle_stripe_dirtying()
4344 (s->locked == 0 && (rcw == 0 || rmw == 0) && in handle_stripe_dirtying()
4345 !test_bit(STRIPE_BIT_DELAY, &sh->state))) in handle_stripe_dirtying()
4350 static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, in handle_parity_checks5() argument
4355 BUG_ON(sh->batch_head); in handle_parity_checks5()
4356 set_bit(STRIPE_HANDLE, &sh->state); in handle_parity_checks5()
4358 switch (sh->check_state) { in handle_parity_checks5()
4361 if (s->failed == 0) { in handle_parity_checks5()
4362 BUG_ON(s->uptodate != disks); in handle_parity_checks5()
4363 sh->check_state = check_state_run; in handle_parity_checks5()
4364 set_bit(STRIPE_OP_CHECK, &s->ops_request); in handle_parity_checks5()
4365 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); in handle_parity_checks5()
4366 s->uptodate--; in handle_parity_checks5()
4369 dev = &sh->dev[s->failed_num[0]]; in handle_parity_checks5()
4372 sh->check_state = check_state_idle; in handle_parity_checks5()
4374 dev = &sh->dev[sh->pd_idx]; in handle_parity_checks5()
4377 if (test_bit(STRIPE_INSYNC, &sh->state)) in handle_parity_checks5()
4381 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); in handle_parity_checks5()
4382 BUG_ON(s->uptodate != disks); in handle_parity_checks5()
4384 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks5()
4385 s->locked++; in handle_parity_checks5()
4386 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks5()
4388 clear_bit(STRIPE_DEGRADED, &sh->state); in handle_parity_checks5()
4389 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4394 sh->check_state = check_state_idle; in handle_parity_checks5()
4399 if (s->failed) in handle_parity_checks5()
4406 if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) in handle_parity_checks5()
4410 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4412 atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); in handle_parity_checks5()
4413 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { in handle_parity_checks5()
4415 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4417 "%llu-%llu\n", mdname(conf->mddev), in handle_parity_checks5()
4418 (unsigned long long) sh->sector, in handle_parity_checks5()
4419 (unsigned long long) sh->sector + in handle_parity_checks5()
4420 RAID5_STRIPE_SECTORS(conf)); in handle_parity_checks5()
4422 sh->check_state = check_state_compute_run; in handle_parity_checks5()
4423 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in handle_parity_checks5()
4424 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in handle_parity_checks5()
4426 &sh->dev[sh->pd_idx].flags); in handle_parity_checks5()
4427 sh->ops.target = sh->pd_idx; in handle_parity_checks5()
4428 sh->ops.target2 = -1; in handle_parity_checks5()
4429 s->uptodate++; in handle_parity_checks5()
4437 __func__, sh->check_state, in handle_parity_checks5()
4438 (unsigned long long) sh->sector); in handle_parity_checks5()
4443 static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, in handle_parity_checks6() argument
4447 int pd_idx = sh->pd_idx; in handle_parity_checks6()
4448 int qd_idx = sh->qd_idx; in handle_parity_checks6()
4451 BUG_ON(sh->batch_head); in handle_parity_checks6()
4452 set_bit(STRIPE_HANDLE, &sh->state); in handle_parity_checks6()
4454 BUG_ON(s->failed > 2); in handle_parity_checks6()
4462 switch (sh->check_state) { in handle_parity_checks6()
4465 if (s->failed == s->q_failed) { in handle_parity_checks6()
4470 sh->check_state = check_state_run; in handle_parity_checks6()
4472 if (!s->q_failed && s->failed < 2) { in handle_parity_checks6()
4476 if (sh->check_state == check_state_run) in handle_parity_checks6()
4477 sh->check_state = check_state_run_pq; in handle_parity_checks6()
4479 sh->check_state = check_state_run_q; in handle_parity_checks6()
4483 sh->ops.zero_sum_result = 0; in handle_parity_checks6()
4485 if (sh->check_state == check_state_run) { in handle_parity_checks6()
4487 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); in handle_parity_checks6()
4488 s->uptodate--; in handle_parity_checks6()
4490 if (sh->check_state >= check_state_run && in handle_parity_checks6()
4491 sh->check_state <= check_state_run_pq) { in handle_parity_checks6()
4495 set_bit(STRIPE_OP_CHECK, &s->ops_request); in handle_parity_checks6()
4499 /* we have 2-disk failure */ in handle_parity_checks6()
4500 BUG_ON(s->failed != 2); in handle_parity_checks6()
4503 sh->check_state = check_state_idle; in handle_parity_checks6()
4506 if (test_bit(STRIPE_INSYNC, &sh->state)) in handle_parity_checks6()
4513 if (s->failed == 2) { in handle_parity_checks6()
4514 dev = &sh->dev[s->failed_num[1]]; in handle_parity_checks6()
4515 s->locked++; in handle_parity_checks6()
4516 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4517 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4519 if (s->failed >= 1) { in handle_parity_checks6()
4520 dev = &sh->dev[s->failed_num[0]]; in handle_parity_checks6()
4521 s->locked++; in handle_parity_checks6()
4522 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4523 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4525 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { in handle_parity_checks6()
4526 dev = &sh->dev[pd_idx]; in handle_parity_checks6()
4527 s->locked++; in handle_parity_checks6()
4528 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4529 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4531 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { in handle_parity_checks6()
4532 dev = &sh->dev[qd_idx]; in handle_parity_checks6()
4533 s->locked++; in handle_parity_checks6()
4534 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4535 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4537 if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags), in handle_parity_checks6()
4539 mdname(conf->mddev), in handle_parity_checks6()
4540 dev - (struct r5dev *) &sh->dev)) { in handle_parity_checks6()
4541 clear_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4542 clear_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4543 s->locked--; in handle_parity_checks6()
4545 clear_bit(STRIPE_DEGRADED, &sh->state); in handle_parity_checks6()
4547 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4554 sh->check_state = check_state_idle; in handle_parity_checks6()
4560 if (sh->ops.zero_sum_result == 0) { in handle_parity_checks6()
4562 if (!s->failed) in handle_parity_checks6()
4563 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4569 sh->check_state = check_state_compute_result; in handle_parity_checks6()
4577 atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); in handle_parity_checks6()
4578 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { in handle_parity_checks6()
4580 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4582 "%llu-%llu\n", mdname(conf->mddev), in handle_parity_checks6()
4583 (unsigned long long) sh->sector, in handle_parity_checks6()
4584 (unsigned long long) sh->sector + in handle_parity_checks6()
4585 RAID5_STRIPE_SECTORS(conf)); in handle_parity_checks6()
4587 int *target = &sh->ops.target; in handle_parity_checks6()
4589 sh->ops.target = -1; in handle_parity_checks6()
4590 sh->ops.target2 = -1; in handle_parity_checks6()
4591 sh->check_state = check_state_compute_run; in handle_parity_checks6()
4592 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in handle_parity_checks6()
4593 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in handle_parity_checks6()
4594 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { in handle_parity_checks6()
4596 &sh->dev[pd_idx].flags); in handle_parity_checks6()
4598 target = &sh->ops.target2; in handle_parity_checks6()
4599 s->uptodate++; in handle_parity_checks6()
4601 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { in handle_parity_checks6()
4603 &sh->dev[qd_idx].flags); in handle_parity_checks6()
4605 s->uptodate++; in handle_parity_checks6()
4614 __func__, sh->check_state, in handle_parity_checks6()
4615 (unsigned long long) sh->sector); in handle_parity_checks6()
4620 static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) in handle_stripe_expansion() argument
4627 struct dma_async_tx_descriptor *tx = NULL; in handle_stripe_expansion() local
4628 BUG_ON(sh->batch_head); in handle_stripe_expansion()
4629 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); in handle_stripe_expansion()
4630 for (i = 0; i < sh->disks; i++) in handle_stripe_expansion()
4631 if (i != sh->pd_idx && i != sh->qd_idx) { in handle_stripe_expansion()
4637 sector_t s = raid5_compute_sector(conf, bn, 0, in handle_stripe_expansion()
4639 sh2 = raid5_get_active_stripe(conf, NULL, s, in handle_stripe_expansion()
4647 if (!test_bit(STRIPE_EXPANDING, &sh2->state) || in handle_stripe_expansion()
4648 test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { in handle_stripe_expansion()
4655 init_async_submit(&submit, 0, tx, NULL, NULL, NULL); in handle_stripe_expansion()
4656 tx = async_memcpy(sh2->dev[dd_idx].page, in handle_stripe_expansion()
4657 sh->dev[i].page, sh2->dev[dd_idx].offset, in handle_stripe_expansion()
4658 sh->dev[i].offset, RAID5_STRIPE_SIZE(conf), in handle_stripe_expansion()
4661 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); in handle_stripe_expansion()
4662 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); in handle_stripe_expansion()
4663 for (j = 0; j < conf->raid_disks; j++) in handle_stripe_expansion()
4664 if (j != sh2->pd_idx && in handle_stripe_expansion()
4665 j != sh2->qd_idx && in handle_stripe_expansion()
4666 !test_bit(R5_Expanded, &sh2->dev[j].flags)) in handle_stripe_expansion()
4668 if (j == conf->raid_disks) { in handle_stripe_expansion()
4669 set_bit(STRIPE_EXPAND_READY, &sh2->state); in handle_stripe_expansion()
4670 set_bit(STRIPE_HANDLE, &sh2->state); in handle_stripe_expansion()
4676 async_tx_quiesce(&tx); in handle_stripe_expansion()
4680 * handle_stripe - do things to a stripe.
4695 struct r5conf *conf = sh->raid_conf; in analyse_stripe() local
4696 int disks = sh->disks; in analyse_stripe()
4703 s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head; in analyse_stripe()
4704 s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head; in analyse_stripe()
4705 s->failed_num[0] = -1; in analyse_stripe()
4706 s->failed_num[1] = -1; in analyse_stripe()
4707 s->log_failed = r5l_log_disk_error(conf); in analyse_stripe()
4711 for (i=disks; i--; ) { in analyse_stripe()
4717 dev = &sh->dev[i]; in analyse_stripe()
4720 i, dev->flags, in analyse_stripe()
4721 dev->toread, dev->towrite, dev->written); in analyse_stripe()
4727 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && in analyse_stripe()
4728 !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) in analyse_stripe()
4729 set_bit(R5_Wantfill, &dev->flags); in analyse_stripe()
4732 if (test_bit(R5_LOCKED, &dev->flags)) in analyse_stripe()
4733 s->locked++; in analyse_stripe()
4734 if (test_bit(R5_UPTODATE, &dev->flags)) in analyse_stripe()
4735 s->uptodate++; in analyse_stripe()
4736 if (test_bit(R5_Wantcompute, &dev->flags)) { in analyse_stripe()
4737 s->compute++; in analyse_stripe()
4738 BUG_ON(s->compute > 2); in analyse_stripe()
4741 if (test_bit(R5_Wantfill, &dev->flags)) in analyse_stripe()
4742 s->to_fill++; in analyse_stripe()
4743 else if (dev->toread) in analyse_stripe()
4744 s->to_read++; in analyse_stripe()
4745 if (dev->towrite) { in analyse_stripe()
4746 s->to_write++; in analyse_stripe()
4747 if (!test_bit(R5_OVERWRITE, &dev->flags)) in analyse_stripe()
4748 s->non_overwrite++; in analyse_stripe()
4750 if (dev->written) in analyse_stripe()
4751 s->written++; in analyse_stripe()
4755 rdev = rcu_dereference(conf->disks[i].replacement); in analyse_stripe()
4756 if (rdev && !test_bit(Faulty, &rdev->flags) && in analyse_stripe()
4757 rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) && in analyse_stripe()
4758 !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in analyse_stripe()
4760 set_bit(R5_ReadRepl, &dev->flags); in analyse_stripe()
4762 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4763 set_bit(R5_NeedReplace, &dev->flags); in analyse_stripe()
4765 clear_bit(R5_NeedReplace, &dev->flags); in analyse_stripe()
4766 rdev = rcu_dereference(conf->disks[i].rdev); in analyse_stripe()
4767 clear_bit(R5_ReadRepl, &dev->flags); in analyse_stripe()
4769 if (rdev && test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4772 is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in analyse_stripe()
4774 if (s->blocked_rdev == NULL in analyse_stripe()
4775 && (test_bit(Blocked, &rdev->flags) in analyse_stripe()
4779 &rdev->flags); in analyse_stripe()
4780 s->blocked_rdev = rdev; in analyse_stripe()
4781 atomic_inc(&rdev->nr_pending); in analyse_stripe()
4784 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4786 /* Not in-sync */; in analyse_stripe()
4788 /* also not in-sync */ in analyse_stripe()
4789 if (!test_bit(WriteErrorSeen, &rdev->flags) && in analyse_stripe()
4790 test_bit(R5_UPTODATE, &dev->flags)) { in analyse_stripe()
4791 /* treat as in-sync, but with a read error in analyse_stripe()
4794 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4795 set_bit(R5_ReadError, &dev->flags); in analyse_stripe()
4797 } else if (test_bit(In_sync, &rdev->flags)) in analyse_stripe()
4798 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4799 else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset) in analyse_stripe()
4801 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4802 else if (test_bit(R5_UPTODATE, &dev->flags) && in analyse_stripe()
4803 test_bit(R5_Expanded, &dev->flags)) in analyse_stripe()
4808 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4810 if (test_bit(R5_WriteError, &dev->flags)) { in analyse_stripe()
4814 conf->disks[i].rdev); in analyse_stripe()
4816 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4817 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4818 s->handle_bad_blocks = 1; in analyse_stripe()
4819 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4821 clear_bit(R5_WriteError, &dev->flags); in analyse_stripe()
4823 if (test_bit(R5_MadeGood, &dev->flags)) { in analyse_stripe()
4827 conf->disks[i].rdev); in analyse_stripe()
4828 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4829 s->handle_bad_blocks = 1; in analyse_stripe()
4830 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4832 clear_bit(R5_MadeGood, &dev->flags); in analyse_stripe()
4834 if (test_bit(R5_MadeGoodRepl, &dev->flags)) { in analyse_stripe()
4836 conf->disks[i].replacement); in analyse_stripe()
4837 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4838 s->handle_bad_blocks = 1; in analyse_stripe()
4839 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4841 clear_bit(R5_MadeGoodRepl, &dev->flags); in analyse_stripe()
4843 if (!test_bit(R5_Insync, &dev->flags)) { in analyse_stripe()
4845 clear_bit(R5_ReadError, &dev->flags); in analyse_stripe()
4846 clear_bit(R5_ReWrite, &dev->flags); in analyse_stripe()
4848 if (test_bit(R5_ReadError, &dev->flags)) in analyse_stripe()
4849 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4850 if (!test_bit(R5_Insync, &dev->flags)) { in analyse_stripe()
4851 if (s->failed < 2) in analyse_stripe()
4852 s->failed_num[s->failed] = i; in analyse_stripe()
4853 s->failed++; in analyse_stripe()
4854 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4858 conf->disks[i].replacement); in analyse_stripe()
4859 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4864 if (test_bit(R5_InJournal, &dev->flags)) in analyse_stripe()
4865 s->injournal++; in analyse_stripe()
4866 if (test_bit(R5_InJournal, &dev->flags) && dev->written) in analyse_stripe()
4867 s->just_cached++; in analyse_stripe()
4869 if (test_bit(STRIPE_SYNCING, &sh->state)) { in analyse_stripe()
4879 sh->sector >= conf->mddev->recovery_cp || in analyse_stripe()
4880 test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery))) in analyse_stripe()
4881 s->syncing = 1; in analyse_stripe()
4883 s->replacing = 1; in analyse_stripe()
4895 if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) in clear_batch_ready()
4896 return (sh->batch_head && sh->batch_head != sh); in clear_batch_ready()
4897 spin_lock(&sh->stripe_lock); in clear_batch_ready()
4898 if (!sh->batch_head) { in clear_batch_ready()
4899 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4907 if (sh->batch_head != sh) { in clear_batch_ready()
4908 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4911 spin_lock(&sh->batch_lock); in clear_batch_ready()
4912 list_for_each_entry(tmp, &sh->batch_list, batch_list) in clear_batch_ready()
4913 clear_bit(STRIPE_BATCH_READY, &tmp->state); in clear_batch_ready()
4914 spin_unlock(&sh->batch_lock); in clear_batch_ready()
4915 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4931 list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { in break_stripe_batch_list()
4933 list_del_init(&sh->batch_list); in break_stripe_batch_list()
4935 WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | in break_stripe_batch_list()
4947 "stripe state: %lx\n", sh->state); in break_stripe_batch_list()
4948 WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | in break_stripe_batch_list()
4950 "head stripe state: %lx\n", head_sh->state); in break_stripe_batch_list()
4952 set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | in break_stripe_batch_list()
4956 head_sh->state & (1 << STRIPE_INSYNC)); in break_stripe_batch_list()
4958 sh->check_state = head_sh->check_state; in break_stripe_batch_list()
4959 sh->reconstruct_state = head_sh->reconstruct_state; in break_stripe_batch_list()
4960 spin_lock_irq(&sh->stripe_lock); in break_stripe_batch_list()
4961 sh->batch_head = NULL; in break_stripe_batch_list()
4962 spin_unlock_irq(&sh->stripe_lock); in break_stripe_batch_list()
4963 for (i = 0; i < sh->disks; i++) { in break_stripe_batch_list()
4964 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in break_stripe_batch_list()
4966 sh->dev[i].flags = head_sh->dev[i].flags & in break_stripe_batch_list()
4970 sh->state & handle_flags) in break_stripe_batch_list()
4971 set_bit(STRIPE_HANDLE, &sh->state); in break_stripe_batch_list()
4974 spin_lock_irq(&head_sh->stripe_lock); in break_stripe_batch_list()
4975 head_sh->batch_head = NULL; in break_stripe_batch_list()
4976 spin_unlock_irq(&head_sh->stripe_lock); in break_stripe_batch_list()
4977 for (i = 0; i < head_sh->disks; i++) in break_stripe_batch_list()
4978 if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) in break_stripe_batch_list()
4980 if (head_sh->state & handle_flags) in break_stripe_batch_list()
4981 set_bit(STRIPE_HANDLE, &head_sh->state); in break_stripe_batch_list()
4984 wake_up(&head_sh->raid_conf->wait_for_overlap); in break_stripe_batch_list()
4990 struct r5conf *conf = sh->raid_conf; in handle_stripe() local
4993 int disks = sh->disks; in handle_stripe()
4996 clear_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5007 if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) { in handle_stripe()
5010 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5014 if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) in handle_stripe()
5017 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { in handle_stripe()
5018 spin_lock(&sh->stripe_lock); in handle_stripe()
5023 if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && in handle_stripe()
5024 !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) && in handle_stripe()
5025 !test_bit(STRIPE_DISCARD, &sh->state) && in handle_stripe()
5026 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { in handle_stripe()
5027 set_bit(STRIPE_SYNCING, &sh->state); in handle_stripe()
5028 clear_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
5029 clear_bit(STRIPE_REPLACED, &sh->state); in handle_stripe()
5031 spin_unlock(&sh->stripe_lock); in handle_stripe()
5033 clear_bit(STRIPE_DELAYED, &sh->state); in handle_stripe()
5037 (unsigned long long)sh->sector, sh->state, in handle_stripe()
5038 atomic_read(&sh->count), sh->pd_idx, sh->qd_idx, in handle_stripe()
5039 sh->check_state, sh->reconstruct_state); in handle_stripe()
5043 if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) in handle_stripe()
5047 test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) { in handle_stripe()
5048 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5055 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5059 rdev_dec_pending(s.blocked_rdev, conf->mddev); in handle_stripe()
5063 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { in handle_stripe()
5065 set_bit(STRIPE_BIOFILL_RUN, &sh->state); in handle_stripe()
5079 if (s.failed > conf->max_degraded || in handle_stripe()
5081 sh->check_state = 0; in handle_stripe()
5082 sh->reconstruct_state = 0; in handle_stripe()
5085 handle_failed_stripe(conf, sh, &s, disks); in handle_stripe()
5087 handle_failed_sync(conf, sh, &s); in handle_stripe()
5094 if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) in handle_stripe()
5096 if (sh->reconstruct_state == reconstruct_state_drain_result || in handle_stripe()
5097 sh->reconstruct_state == reconstruct_state_prexor_drain_result) { in handle_stripe()
5098 sh->reconstruct_state = reconstruct_state_idle; in handle_stripe()
5103 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) && in handle_stripe()
5104 !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)); in handle_stripe()
5105 BUG_ON(sh->qd_idx >= 0 && in handle_stripe()
5106 !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) && in handle_stripe()
5107 !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags)); in handle_stripe()
5108 for (i = disks; i--; ) { in handle_stripe()
5109 struct r5dev *dev = &sh->dev[i]; in handle_stripe()
5110 if (test_bit(R5_LOCKED, &dev->flags) && in handle_stripe()
5111 (i == sh->pd_idx || i == sh->qd_idx || in handle_stripe()
5112 dev->written || test_bit(R5_InJournal, in handle_stripe()
5113 &dev->flags))) { in handle_stripe()
5115 set_bit(R5_Wantwrite, &dev->flags); in handle_stripe()
5120 if (!test_bit(R5_Insync, &dev->flags) || in handle_stripe()
5121 ((i == sh->pd_idx || i == sh->qd_idx) && in handle_stripe()
5123 set_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
5126 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in handle_stripe()
5134 pdev = &sh->dev[sh->pd_idx]; in handle_stripe()
5135 s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx) in handle_stripe()
5136 || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx); in handle_stripe()
5137 qdev = &sh->dev[sh->qd_idx]; in handle_stripe()
5138 s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx) in handle_stripe()
5139 || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx) in handle_stripe()
5140 || conf->level < 6; in handle_stripe()
5143 (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) in handle_stripe()
5144 && !test_bit(R5_LOCKED, &pdev->flags) in handle_stripe()
5145 && (test_bit(R5_UPTODATE, &pdev->flags) || in handle_stripe()
5146 test_bit(R5_Discard, &pdev->flags))))) && in handle_stripe()
5147 (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) in handle_stripe()
5148 && !test_bit(R5_LOCKED, &qdev->flags) in handle_stripe()
5149 && (test_bit(R5_UPTODATE, &qdev->flags) || in handle_stripe()
5150 test_bit(R5_Discard, &qdev->flags)))))) in handle_stripe()
5151 handle_stripe_clean_event(conf, sh, disks); in handle_stripe()
5154 r5c_handle_cached_data_endio(conf, sh, disks); in handle_stripe()
5173 r5c_finish_stripe_write_out(conf, sh, &s); in handle_stripe()
5184 if (!sh->reconstruct_state && !sh->check_state && !sh->log_io) { in handle_stripe()
5185 if (!r5c_is_writeback(conf->log)) { in handle_stripe()
5187 handle_stripe_dirtying(conf, sh, &s, disks); in handle_stripe()
5193 ret = r5c_try_caching_write(conf, sh, &s, in handle_stripe()
5196 * If caching phase failed: ret == -EAGAIN in handle_stripe()
5202 if (ret == -EAGAIN || in handle_stripe()
5204 (!test_bit(STRIPE_R5C_CACHING, &sh->state) && in handle_stripe()
5206 ret = handle_stripe_dirtying(conf, sh, &s, in handle_stripe()
5208 if (ret == -EAGAIN) in handle_stripe()
5219 if (sh->check_state || in handle_stripe()
5221 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && in handle_stripe()
5222 !test_bit(STRIPE_INSYNC, &sh->state))) { in handle_stripe()
5223 if (conf->level == 6) in handle_stripe()
5224 handle_parity_checks6(conf, sh, &s, disks); in handle_stripe()
5226 handle_parity_checks5(conf, sh, &s, disks); in handle_stripe()
5230 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) in handle_stripe()
5231 && !test_bit(STRIPE_REPLACED, &sh->state)) { in handle_stripe()
5233 for (i = 0; i < conf->raid_disks; i++) in handle_stripe()
5234 if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) { in handle_stripe()
5235 WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags)); in handle_stripe()
5236 set_bit(R5_WantReplace, &sh->dev[i].flags); in handle_stripe()
5237 set_bit(R5_LOCKED, &sh->dev[i].flags); in handle_stripe()
5241 set_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
5242 set_bit(STRIPE_REPLACED, &sh->state); in handle_stripe()
5245 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && in handle_stripe()
5246 test_bit(STRIPE_INSYNC, &sh->state)) { in handle_stripe()
5247 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); in handle_stripe()
5248 clear_bit(STRIPE_SYNCING, &sh->state); in handle_stripe()
5249 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) in handle_stripe()
5250 wake_up(&conf->wait_for_overlap); in handle_stripe()
5256 if (s.failed <= conf->max_degraded && !conf->mddev->ro) in handle_stripe()
5258 struct r5dev *dev = &sh->dev[s.failed_num[i]]; in handle_stripe()
5259 if (test_bit(R5_ReadError, &dev->flags) in handle_stripe()
5260 && !test_bit(R5_LOCKED, &dev->flags) in handle_stripe()
5261 && test_bit(R5_UPTODATE, &dev->flags) in handle_stripe()
5263 if (!test_bit(R5_ReWrite, &dev->flags)) { in handle_stripe()
5264 set_bit(R5_Wantwrite, &dev->flags); in handle_stripe()
5265 set_bit(R5_ReWrite, &dev->flags); in handle_stripe()
5268 set_bit(R5_Wantread, &dev->flags); in handle_stripe()
5269 set_bit(R5_LOCKED, &dev->flags); in handle_stripe()
5275 if (sh->reconstruct_state == reconstruct_state_result) { in handle_stripe()
5277 = raid5_get_active_stripe(conf, NULL, sh->sector, in handle_stripe()
5280 if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { in handle_stripe()
5284 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe()
5285 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5287 &sh_src->state)) in handle_stripe()
5288 atomic_inc(&conf->preread_active_stripes); in handle_stripe()
5295 sh->reconstruct_state = reconstruct_state_idle; in handle_stripe()
5296 clear_bit(STRIPE_EXPANDING, &sh->state); in handle_stripe()
5297 for (i = conf->raid_disks; i--; ) { in handle_stripe()
5298 set_bit(R5_Wantwrite, &sh->dev[i].flags); in handle_stripe()
5299 set_bit(R5_LOCKED, &sh->dev[i].flags); in handle_stripe()
5304 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && in handle_stripe()
5305 !sh->reconstruct_state) { in handle_stripe()
5307 sh->disks = conf->raid_disks; in handle_stripe()
5308 stripe_set_idx(sh->sector, conf, 0, sh); in handle_stripe()
5310 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { in handle_stripe()
5311 clear_bit(STRIPE_EXPAND_READY, &sh->state); in handle_stripe()
5312 atomic_dec(&conf->reshape_stripes); in handle_stripe()
5313 wake_up(&conf->wait_for_overlap); in handle_stripe()
5314 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); in handle_stripe()
5318 !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) in handle_stripe()
5319 handle_stripe_expansion(conf, sh); in handle_stripe()
5324 if (conf->mddev->external) in handle_stripe()
5326 conf->mddev); in handle_stripe()
5333 conf->mddev); in handle_stripe()
5337 for (i = disks; i--; ) { in handle_stripe()
5339 struct r5dev *dev = &sh->dev[i]; in handle_stripe()
5340 if (test_and_clear_bit(R5_WriteError, &dev->flags)) { in handle_stripe()
5342 rdev = rdev_pend_deref(conf->disks[i].rdev); in handle_stripe()
5343 if (!rdev_set_badblocks(rdev, sh->sector, in handle_stripe()
5344 RAID5_STRIPE_SECTORS(conf), 0)) in handle_stripe()
5345 md_error(conf->mddev, rdev); in handle_stripe()
5346 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5348 if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { in handle_stripe()
5349 rdev = rdev_pend_deref(conf->disks[i].rdev); in handle_stripe()
5350 rdev_clear_badblocks(rdev, sh->sector, in handle_stripe()
5351 RAID5_STRIPE_SECTORS(conf), 0); in handle_stripe()
5352 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5354 if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { in handle_stripe()
5355 rdev = rdev_pend_deref(conf->disks[i].replacement); in handle_stripe()
5358 rdev = rdev_pend_deref(conf->disks[i].rdev); in handle_stripe()
5359 rdev_clear_badblocks(rdev, sh->sector, in handle_stripe()
5360 RAID5_STRIPE_SECTORS(conf), 0); in handle_stripe()
5361 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5375 atomic_dec(&conf->preread_active_stripes); in handle_stripe()
5376 if (atomic_read(&conf->preread_active_stripes) < in handle_stripe()
5378 md_wakeup_thread(conf->mddev->thread); in handle_stripe()
5381 clear_bit_unlock(STRIPE_ACTIVE, &sh->state); in handle_stripe()
5384 static void raid5_activate_delayed(struct r5conf *conf) in raid5_activate_delayed() argument
5385 __must_hold(&conf->device_lock) in raid5_activate_delayed()
5387 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { in raid5_activate_delayed()
5388 while (!list_empty(&conf->delayed_list)) { in raid5_activate_delayed()
5389 struct list_head *l = conf->delayed_list.next; in raid5_activate_delayed()
5393 clear_bit(STRIPE_DELAYED, &sh->state); in raid5_activate_delayed()
5394 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in raid5_activate_delayed()
5395 atomic_inc(&conf->preread_active_stripes); in raid5_activate_delayed()
5396 list_add_tail(&sh->lru, &conf->hold_list); in raid5_activate_delayed()
5402 static void activate_bit_delay(struct r5conf *conf, in activate_bit_delay() argument
5404 __must_hold(&conf->device_lock) in activate_bit_delay()
5407 list_add(&head, &conf->bitmap_list); in activate_bit_delay()
5408 list_del_init(&conf->bitmap_list); in activate_bit_delay()
5412 list_del_init(&sh->lru); in activate_bit_delay()
5413 atomic_inc(&sh->count); in activate_bit_delay()
5414 hash = sh->hash_lock_index; in activate_bit_delay()
5415 __release_stripe(conf, sh, &temp_inactive_list[hash]); in activate_bit_delay()
5421 struct r5conf *conf = mddev->private; in in_chunk_boundary() local
5422 sector_t sector = bio->bi_iter.bi_sector; in in_chunk_boundary()
5426 chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors); in in_chunk_boundary()
5428 ((sector & (chunk_sectors - 1)) + bio_sectors); in in_chunk_boundary()
5435 static void add_bio_to_retry(struct bio *bi,struct r5conf *conf) in add_bio_to_retry() argument
5439 spin_lock_irqsave(&conf->device_lock, flags); in add_bio_to_retry()
5441 bi->bi_next = conf->retry_read_aligned_list; in add_bio_to_retry()
5442 conf->retry_read_aligned_list = bi; in add_bio_to_retry()
5444 spin_unlock_irqrestore(&conf->device_lock, flags); in add_bio_to_retry()
5445 md_wakeup_thread(conf->mddev->thread); in add_bio_to_retry()
5448 static struct bio *remove_bio_from_retry(struct r5conf *conf, in remove_bio_from_retry() argument
5453 bi = conf->retry_read_aligned; in remove_bio_from_retry()
5455 *offset = conf->retry_read_offset; in remove_bio_from_retry()
5456 conf->retry_read_aligned = NULL; in remove_bio_from_retry()
5459 bi = conf->retry_read_aligned_list; in remove_bio_from_retry()
5461 conf->retry_read_aligned_list = bi->bi_next; in remove_bio_from_retry()
5462 bi->bi_next = NULL; in remove_bio_from_retry()
5477 struct bio *raid_bi = bi->bi_private; in raid5_align_endio()
5478 struct md_rdev *rdev = (void *)raid_bi->bi_next; in raid5_align_endio()
5479 struct mddev *mddev = rdev->mddev; in raid5_align_endio()
5480 struct r5conf *conf = mddev->private; in raid5_align_endio() local
5481 blk_status_t error = bi->bi_status; in raid5_align_endio()
5484 raid_bi->bi_next = NULL; in raid5_align_endio()
5485 rdev_dec_pending(rdev, conf->mddev); in raid5_align_endio()
5489 if (atomic_dec_and_test(&conf->active_aligned_reads)) in raid5_align_endio()
5490 wake_up(&conf->wait_for_quiescent); in raid5_align_endio()
5496 add_bio_to_retry(raid_bi, conf); in raid5_align_endio()
5501 struct r5conf *conf = mddev->private; in raid5_read_one_chunk() local
5513 sector = raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, 0, in raid5_read_one_chunk()
5518 if (r5c_big_stripe_cached(conf, sector)) in raid5_read_one_chunk()
5521 rdev = rcu_dereference(conf->disks[dd_idx].replacement); in raid5_read_one_chunk()
5522 if (!rdev || test_bit(Faulty, &rdev->flags) || in raid5_read_one_chunk()
5523 rdev->recovery_offset < end_sector) { in raid5_read_one_chunk()
5524 rdev = rcu_dereference(conf->disks[dd_idx].rdev); in raid5_read_one_chunk()
5527 if (test_bit(Faulty, &rdev->flags) || in raid5_read_one_chunk()
5528 !(test_bit(In_sync, &rdev->flags) || in raid5_read_one_chunk()
5529 rdev->recovery_offset >= end_sector)) in raid5_read_one_chunk()
5533 atomic_inc(&rdev->nr_pending); in raid5_read_one_chunk()
5543 raid_bio->bi_next = (void *)rdev; in raid5_read_one_chunk()
5545 align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO, in raid5_read_one_chunk()
5546 &mddev->bio_set); in raid5_read_one_chunk()
5547 align_bio->bi_end_io = raid5_align_endio; in raid5_read_one_chunk()
5548 align_bio->bi_private = raid_bio; in raid5_read_one_chunk()
5549 align_bio->bi_iter.bi_sector = sector; in raid5_read_one_chunk()
5551 /* No reshape active, so we can trust rdev->data_offset */ in raid5_read_one_chunk()
5552 align_bio->bi_iter.bi_sector += rdev->data_offset; in raid5_read_one_chunk()
5555 if (conf->quiesce == 0) { in raid5_read_one_chunk()
5556 atomic_inc(&conf->active_aligned_reads); in raid5_read_one_chunk()
5560 if (!did_inc || smp_load_acquire(&conf->quiesce) != 0) { in raid5_read_one_chunk()
5564 if (did_inc && atomic_dec_and_test(&conf->active_aligned_reads)) in raid5_read_one_chunk()
5565 wake_up(&conf->wait_for_quiescent); in raid5_read_one_chunk()
5566 spin_lock_irq(&conf->device_lock); in raid5_read_one_chunk()
5567 wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, in raid5_read_one_chunk()
5568 conf->device_lock); in raid5_read_one_chunk()
5569 atomic_inc(&conf->active_aligned_reads); in raid5_read_one_chunk()
5570 spin_unlock_irq(&conf->device_lock); in raid5_read_one_chunk()
5573 if (mddev->gendisk) in raid5_read_one_chunk()
5574 trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk), in raid5_read_one_chunk()
5575 raid_bio->bi_iter.bi_sector); in raid5_read_one_chunk()
5587 sector_t sector = raid_bio->bi_iter.bi_sector; in chunk_aligned_read()
5588 unsigned chunk_sects = mddev->chunk_sectors; in chunk_aligned_read()
5589 unsigned sectors = chunk_sects - (sector & (chunk_sects-1)); in chunk_aligned_read()
5592 struct r5conf *conf = mddev->private; in chunk_aligned_read() local
5593 split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split); in chunk_aligned_read()
5605 /* __get_priority_stripe - get the next stripe to process
5615 static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) in __get_priority_stripe() argument
5616 __must_hold(&conf->device_lock) in __get_priority_stripe()
5621 bool second_try = !r5c_is_writeback(conf->log) && in __get_priority_stripe()
5622 !r5l_log_disk_error(conf); in __get_priority_stripe()
5623 bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) || in __get_priority_stripe()
5624 r5l_log_disk_error(conf); in __get_priority_stripe()
5629 if (conf->worker_cnt_per_group == 0) { in __get_priority_stripe()
5630 handle_list = try_loprio ? &conf->loprio_list : in __get_priority_stripe()
5631 &conf->handle_list; in __get_priority_stripe()
5633 handle_list = try_loprio ? &conf->worker_groups[group].loprio_list : in __get_priority_stripe()
5634 &conf->worker_groups[group].handle_list; in __get_priority_stripe()
5635 wg = &conf->worker_groups[group]; in __get_priority_stripe()
5638 for (i = 0; i < conf->group_cnt; i++) { in __get_priority_stripe()
5639 handle_list = try_loprio ? &conf->worker_groups[i].loprio_list : in __get_priority_stripe()
5640 &conf->worker_groups[i].handle_list; in __get_priority_stripe()
5641 wg = &conf->worker_groups[i]; in __get_priority_stripe()
5650 list_empty(&conf->hold_list) ? "empty" : "busy", in __get_priority_stripe()
5651 atomic_read(&conf->pending_full_writes), conf->bypass_count); in __get_priority_stripe()
5654 sh = list_entry(handle_list->next, typeof(*sh), lru); in __get_priority_stripe()
5656 if (list_empty(&conf->hold_list)) in __get_priority_stripe()
5657 conf->bypass_count = 0; in __get_priority_stripe()
5658 else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) { in __get_priority_stripe()
5659 if (conf->hold_list.next == conf->last_hold) in __get_priority_stripe()
5660 conf->bypass_count++; in __get_priority_stripe()
5662 conf->last_hold = conf->hold_list.next; in __get_priority_stripe()
5663 conf->bypass_count -= conf->bypass_threshold; in __get_priority_stripe()
5664 if (conf->bypass_count < 0) in __get_priority_stripe()
5665 conf->bypass_count = 0; in __get_priority_stripe()
5668 } else if (!list_empty(&conf->hold_list) && in __get_priority_stripe()
5669 ((conf->bypass_threshold && in __get_priority_stripe()
5670 conf->bypass_count > conf->bypass_threshold) || in __get_priority_stripe()
5671 atomic_read(&conf->pending_full_writes) == 0)) { in __get_priority_stripe()
5673 list_for_each_entry(tmp, &conf->hold_list, lru) { in __get_priority_stripe()
5674 if (conf->worker_cnt_per_group == 0 || in __get_priority_stripe()
5676 !cpu_online(tmp->cpu) || in __get_priority_stripe()
5677 cpu_to_group(tmp->cpu) == group) { in __get_priority_stripe()
5684 conf->bypass_count -= conf->bypass_threshold; in __get_priority_stripe()
5685 if (conf->bypass_count < 0) in __get_priority_stripe()
5686 conf->bypass_count = 0; in __get_priority_stripe()
5700 wg->stripes_cnt--; in __get_priority_stripe()
5701 sh->group = NULL; in __get_priority_stripe()
5703 list_del_init(&sh->lru); in __get_priority_stripe()
5704 BUG_ON(atomic_inc_return(&sh->count) != 1); in __get_priority_stripe()
5719 struct mddev *mddev = cb->cb.data; in raid5_unplug()
5720 struct r5conf *conf = mddev->private; in raid5_unplug() local
5724 if (cb->list.next && !list_empty(&cb->list)) { in raid5_unplug()
5725 spin_lock_irq(&conf->device_lock); in raid5_unplug()
5726 while (!list_empty(&cb->list)) { in raid5_unplug()
5727 sh = list_first_entry(&cb->list, struct stripe_head, lru); in raid5_unplug()
5728 list_del_init(&sh->lru); in raid5_unplug()
5735 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); in raid5_unplug()
5740 hash = sh->hash_lock_index; in raid5_unplug()
5741 __release_stripe(conf, sh, &cb->temp_inactive_list[hash]); in raid5_unplug()
5744 spin_unlock_irq(&conf->device_lock); in raid5_unplug()
5746 release_inactive_stripe_list(conf, cb->temp_inactive_list, in raid5_unplug()
5748 if (mddev->queue) in raid5_unplug()
5749 trace_block_unplug(mddev->queue, cnt, !from_schedule); in raid5_unplug()
5768 if (cb->list.next == NULL) { in release_stripe_plug()
5770 INIT_LIST_HEAD(&cb->list); in release_stripe_plug()
5772 INIT_LIST_HEAD(cb->temp_inactive_list + i); in release_stripe_plug()
5775 if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) in release_stripe_plug()
5776 list_add_tail(&sh->lru, &cb->list); in release_stripe_plug()
5783 struct r5conf *conf = mddev->private; in make_discard_request() local
5789 if (WARN_ON_ONCE(bi->bi_opf & REQ_NOWAIT)) in make_discard_request()
5792 if (mddev->reshape_position != MaxSector) in make_discard_request()
5796 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in make_discard_request()
5799 bi->bi_next = NULL; in make_discard_request()
5801 stripe_sectors = conf->chunk_sectors * in make_discard_request()
5802 (conf->raid_disks - conf->max_degraded); in make_discard_request()
5807 logical_sector *= conf->chunk_sectors; in make_discard_request()
5808 last_sector *= conf->chunk_sectors; in make_discard_request()
5811 logical_sector += RAID5_STRIPE_SECTORS(conf)) { in make_discard_request()
5815 sh = raid5_get_active_stripe(conf, NULL, logical_sector, 0); in make_discard_request()
5816 prepare_to_wait(&conf->wait_for_overlap, &w, in make_discard_request()
5818 set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); in make_discard_request()
5819 if (test_bit(STRIPE_SYNCING, &sh->state)) { in make_discard_request()
5824 clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); in make_discard_request()
5825 spin_lock_irq(&sh->stripe_lock); in make_discard_request()
5826 for (d = 0; d < conf->raid_disks; d++) { in make_discard_request()
5827 if (d == sh->pd_idx || d == sh->qd_idx) in make_discard_request()
5829 if (sh->dev[d].towrite || sh->dev[d].toread) { in make_discard_request()
5830 set_bit(R5_Overlap, &sh->dev[d].flags); in make_discard_request()
5831 spin_unlock_irq(&sh->stripe_lock); in make_discard_request()
5837 set_bit(STRIPE_DISCARD, &sh->state); in make_discard_request()
5838 finish_wait(&conf->wait_for_overlap, &w); in make_discard_request()
5839 sh->overwrite_disks = 0; in make_discard_request()
5840 for (d = 0; d < conf->raid_disks; d++) { in make_discard_request()
5841 if (d == sh->pd_idx || d == sh->qd_idx) in make_discard_request()
5843 sh->dev[d].towrite = bi; in make_discard_request()
5844 set_bit(R5_OVERWRITE, &sh->dev[d].flags); in make_discard_request()
5847 sh->overwrite_disks++; in make_discard_request()
5849 spin_unlock_irq(&sh->stripe_lock); in make_discard_request()
5850 if (conf->mddev->bitmap) { in make_discard_request()
5852 d < conf->raid_disks - conf->max_degraded; in make_discard_request()
5854 md_bitmap_startwrite(mddev->bitmap, in make_discard_request()
5855 sh->sector, in make_discard_request()
5856 RAID5_STRIPE_SECTORS(conf), in make_discard_request()
5858 sh->bm_seq = conf->seq_flush + 1; in make_discard_request()
5859 set_bit(STRIPE_BIT_DELAY, &sh->state); in make_discard_request()
5862 set_bit(STRIPE_HANDLE, &sh->state); in make_discard_request()
5863 clear_bit(STRIPE_DELAYED, &sh->state); in make_discard_request()
5864 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in make_discard_request()
5865 atomic_inc(&conf->preread_active_stripes); in make_discard_request()
5875 return mddev->reshape_backwards ? sector < reshape_sector : in ahead_of_reshape()
5882 return mddev->reshape_backwards ? max < reshape_sector : in range_ahead_of_reshape()
5886 static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf, in stripe_ahead_of_reshape() argument
5893 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) { in stripe_ahead_of_reshape()
5894 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in stripe_ahead_of_reshape()
5897 min_sector = min(min_sector, sh->dev[dd_idx].sector); in stripe_ahead_of_reshape()
5898 max_sector = max(max_sector, sh->dev[dd_idx].sector); in stripe_ahead_of_reshape()
5901 spin_lock_irq(&conf->device_lock); in stripe_ahead_of_reshape()
5904 conf->reshape_progress)) in stripe_ahead_of_reshape()
5908 spin_unlock_irq(&conf->device_lock); in stripe_ahead_of_reshape()
5913 static int add_all_stripe_bios(struct r5conf *conf, in add_all_stripe_bios() argument
5920 spin_lock_irq(&sh->stripe_lock); in add_all_stripe_bios()
5922 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) { in add_all_stripe_bios()
5923 struct r5dev *dev = &sh->dev[dd_idx]; in add_all_stripe_bios()
5925 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in add_all_stripe_bios()
5928 if (dev->sector < ctx->first_sector || in add_all_stripe_bios()
5929 dev->sector >= ctx->last_sector) in add_all_stripe_bios()
5933 set_bit(R5_Overlap, &dev->flags); in add_all_stripe_bios()
5942 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) { in add_all_stripe_bios()
5943 struct r5dev *dev = &sh->dev[dd_idx]; in add_all_stripe_bios()
5945 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in add_all_stripe_bios()
5948 if (dev->sector < ctx->first_sector || in add_all_stripe_bios()
5949 dev->sector >= ctx->last_sector) in add_all_stripe_bios()
5953 clear_bit((dev->sector - ctx->first_sector) >> in add_all_stripe_bios()
5954 RAID5_STRIPE_SHIFT(conf), ctx->sectors_to_do); in add_all_stripe_bios()
5958 spin_unlock_irq(&sh->stripe_lock); in add_all_stripe_bios()
5964 return test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && in reshape_inprogress()
5965 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && in reshape_inprogress()
5966 !test_bit(MD_RECOVERY_DONE, &mddev->recovery) && in reshape_inprogress()
5967 !test_bit(MD_RECOVERY_INTR, &mddev->recovery); in reshape_inprogress()
5976 struct r5conf *conf, struct stripe_request_ctx *ctx, in make_stripe_request() argument
5986 seq = read_seqcount_begin(&conf->gen_lock); in make_stripe_request()
5988 if (unlikely(conf->reshape_progress != MaxSector)) { in make_stripe_request()
5992 * possible to see a half-updated value in make_stripe_request()
5998 spin_lock_irq(&conf->device_lock); in make_stripe_request()
6000 conf->reshape_progress)) { in make_stripe_request()
6004 conf->reshape_safe)) { in make_stripe_request()
6005 spin_unlock_irq(&conf->device_lock); in make_stripe_request()
6010 spin_unlock_irq(&conf->device_lock); in make_stripe_request()
6013 new_sector = raid5_compute_sector(conf, logical_sector, previous, in make_stripe_request()
6020 if (bi->bi_opf & REQ_RAHEAD) in make_stripe_request()
6022 sh = raid5_get_active_stripe(conf, ctx, new_sector, flags); in make_stripe_request()
6024 /* cannot get stripe, just give-up */ in make_stripe_request()
6025 bi->bi_status = BLK_STS_IOERR; in make_stripe_request()
6030 stripe_ahead_of_reshape(mddev, conf, sh)) { in make_stripe_request()
6043 if (read_seqcount_retry(&conf->gen_lock, seq)) { in make_stripe_request()
6049 if (test_bit(STRIPE_EXPANDING, &sh->state) || in make_stripe_request()
6050 !add_all_stripe_bios(conf, ctx, sh, bi, rw, previous)) { in make_stripe_request()
6055 md_wakeup_thread(mddev->thread); in make_stripe_request()
6061 stripe_add_to_batch_list(conf, sh, ctx->batch_last); in make_stripe_request()
6062 if (ctx->batch_last) in make_stripe_request()
6063 raid5_release_stripe(ctx->batch_last); in make_stripe_request()
6064 atomic_inc(&sh->count); in make_stripe_request()
6065 ctx->batch_last = sh; in make_stripe_request()
6068 if (ctx->do_flush) { in make_stripe_request()
6069 set_bit(STRIPE_R5C_PREFLUSH, &sh->state); in make_stripe_request()
6071 ctx->do_flush = false; in make_stripe_request()
6074 set_bit(STRIPE_HANDLE, &sh->state); in make_stripe_request()
6075 clear_bit(STRIPE_DELAYED, &sh->state); in make_stripe_request()
6076 if ((!sh->batch_head || sh == sh->batch_head) && in make_stripe_request()
6077 (bi->bi_opf & REQ_SYNC) && in make_stripe_request()
6078 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in make_stripe_request()
6079 atomic_inc(&conf->preread_active_stripes); in make_stripe_request()
6089 bi->bi_status = BLK_STS_IOERR; in make_stripe_request()
6102 static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf, in raid5_bio_lowest_chunk_sector() argument
6105 int sectors_per_chunk = conf->chunk_sectors; in raid5_bio_lowest_chunk_sector()
6106 int raid_disks = conf->raid_disks; in raid5_bio_lowest_chunk_sector()
6110 sector_t r_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in raid5_bio_lowest_chunk_sector()
6114 sector = raid5_compute_sector(conf, r_sector, 0, &dd_idx, &sh); in raid5_bio_lowest_chunk_sector()
6116 if (sectors_per_chunk - chunk_offset >= bio_sectors(bi)) in raid5_bio_lowest_chunk_sector()
6127 return r_sector + sectors_per_chunk - chunk_offset; in raid5_bio_lowest_chunk_sector()
6133 struct r5conf *conf = mddev->private; in raid5_make_request() local
6140 if (unlikely(bi->bi_opf & REQ_PREFLUSH)) { in raid5_make_request()
6141 int ret = log_handle_flush_request(conf, bi); in raid5_make_request()
6145 if (ret == -ENODEV) { in raid5_make_request()
6149 /* ret == -EAGAIN, fallback */ in raid5_make_request()
6154 ctx.do_flush = bi->bi_opf & REQ_PREFLUSH; in raid5_make_request()
6164 if (rw == READ && mddev->degraded == 0 && in raid5_make_request()
6165 mddev->reshape_position == MaxSector) { in raid5_make_request()
6177 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in raid5_make_request()
6180 bi->bi_next = NULL; in raid5_make_request()
6182 stripe_cnt = DIV_ROUND_UP_SECTOR_T(ctx.last_sector - logical_sector, in raid5_make_request()
6183 RAID5_STRIPE_SECTORS(conf)); in raid5_make_request()
6187 bi->bi_iter.bi_sector, ctx.last_sector); in raid5_make_request()
6190 if ((bi->bi_opf & REQ_NOWAIT) && in raid5_make_request()
6191 (conf->reshape_progress != MaxSector) && in raid5_make_request()
6192 !ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) && in raid5_make_request()
6193 ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) { in raid5_make_request()
6208 if (likely(conf->reshape_progress == MaxSector)) in raid5_make_request()
6209 logical_sector = raid5_bio_lowest_chunk_sector(conf, bi); in raid5_make_request()
6210 s = (logical_sector - ctx.first_sector) >> RAID5_STRIPE_SHIFT(conf); in raid5_make_request()
6212 add_wait_queue(&conf->wait_for_overlap, &wait); in raid5_make_request()
6214 res = make_stripe_request(mddev, conf, &ctx, logical_sector, in raid5_make_request()
6245 (s << RAID5_STRIPE_SHIFT(conf)); in raid5_make_request()
6247 remove_wait_queue(&conf->wait_for_overlap, &wait); in raid5_make_request()
6271 struct r5conf *conf = mddev->private; in reshape_request() local
6275 int raid_disks = conf->previous_raid_disks; in reshape_request()
6276 int data_disks = raid_disks - conf->max_degraded; in reshape_request()
6277 int new_data_disks = conf->raid_disks - conf->max_degraded; in reshape_request()
6288 if (mddev->reshape_backwards && in reshape_request()
6289 conf->reshape_progress < raid5_size(mddev, 0, 0)) { in reshape_request()
6291 - conf->reshape_progress; in reshape_request()
6292 } else if (mddev->reshape_backwards && in reshape_request()
6293 conf->reshape_progress == MaxSector) { in reshape_request()
6296 } else if (!mddev->reshape_backwards && in reshape_request()
6297 conf->reshape_progress > 0) in reshape_request()
6298 sector_nr = conf->reshape_progress; in reshape_request()
6301 mddev->curr_resync_completed = sector_nr; in reshape_request()
6302 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
6314 reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors); in reshape_request()
6317 * the data about to be copied would over-write the source of in reshape_request()
6322 writepos = conf->reshape_progress; in reshape_request()
6324 readpos = conf->reshape_progress; in reshape_request()
6326 safepos = conf->reshape_safe; in reshape_request()
6328 if (mddev->reshape_backwards) { in reshape_request()
6332 writepos -= reshape_sectors; in reshape_request()
6337 /* readpos and safepos are worst-case calculations. in reshape_request()
6341 readpos -= min_t(sector_t, reshape_sectors, readpos); in reshape_request()
6342 safepos -= min_t(sector_t, reshape_sectors, safepos); in reshape_request()
6348 if (mddev->reshape_backwards) { in reshape_request()
6349 if (WARN_ON(conf->reshape_progress == 0)) in reshape_request()
6353 if (WARN_ON((mddev->dev_sectors & in reshape_request()
6354 ~((sector_t)reshape_sectors - 1)) - in reshape_request()
6355 reshape_sectors - stripe_addr != sector_nr)) in reshape_request()
6372 * ensure safety in the face of a crash - that must be done by userspace in reshape_request()
6384 if (conf->min_offset_diff < 0) { in reshape_request()
6385 safepos += -conf->min_offset_diff; in reshape_request()
6386 readpos += -conf->min_offset_diff; in reshape_request()
6388 writepos += conf->min_offset_diff; in reshape_request()
6390 if ((mddev->reshape_backwards in reshape_request()
6393 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { in reshape_request()
6395 wait_event(conf->wait_for_overlap, in reshape_request()
6396 atomic_read(&conf->reshape_stripes)==0 in reshape_request()
6397 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6398 if (atomic_read(&conf->reshape_stripes) != 0) in reshape_request()
6400 mddev->reshape_position = conf->reshape_progress; in reshape_request()
6401 mddev->curr_resync_completed = sector_nr; in reshape_request()
6402 if (!mddev->reshape_backwards) in reshape_request()
6405 if (rdev->raid_disk >= 0 && in reshape_request()
6406 !test_bit(Journal, &rdev->flags) && in reshape_request()
6407 !test_bit(In_sync, &rdev->flags) && in reshape_request()
6408 rdev->recovery_offset < sector_nr) in reshape_request()
6409 rdev->recovery_offset = sector_nr; in reshape_request()
6411 conf->reshape_checkpoint = jiffies; in reshape_request()
6412 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in reshape_request()
6413 md_wakeup_thread(mddev->thread); in reshape_request()
6414 wait_event(mddev->sb_wait, mddev->sb_flags == 0 || in reshape_request()
6415 test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6416 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) in reshape_request()
6418 spin_lock_irq(&conf->device_lock); in reshape_request()
6419 conf->reshape_safe = mddev->reshape_position; in reshape_request()
6420 spin_unlock_irq(&conf->device_lock); in reshape_request()
6421 wake_up(&conf->wait_for_overlap); in reshape_request()
6422 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
6426 for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) { in reshape_request()
6429 sh = raid5_get_active_stripe(conf, NULL, stripe_addr+i, in reshape_request()
6431 set_bit(STRIPE_EXPANDING, &sh->state); in reshape_request()
6432 atomic_inc(&conf->reshape_stripes); in reshape_request()
6436 for (j=sh->disks; j--;) { in reshape_request()
6438 if (j == sh->pd_idx) in reshape_request()
6440 if (conf->level == 6 && in reshape_request()
6441 j == sh->qd_idx) in reshape_request()
6448 memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf)); in reshape_request()
6449 set_bit(R5_Expanded, &sh->dev[j].flags); in reshape_request()
6450 set_bit(R5_UPTODATE, &sh->dev[j].flags); in reshape_request()
6453 set_bit(STRIPE_EXPAND_READY, &sh->state); in reshape_request()
6454 set_bit(STRIPE_HANDLE, &sh->state); in reshape_request()
6456 list_add(&sh->lru, &stripes); in reshape_request()
6458 spin_lock_irq(&conf->device_lock); in reshape_request()
6459 if (mddev->reshape_backwards) in reshape_request()
6460 conf->reshape_progress -= reshape_sectors * new_data_disks; in reshape_request()
6462 conf->reshape_progress += reshape_sectors * new_data_disks; in reshape_request()
6463 spin_unlock_irq(&conf->device_lock); in reshape_request()
6470 raid5_compute_sector(conf, stripe_addr*(new_data_disks), in reshape_request()
6473 raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) in reshape_request()
6474 * new_data_disks - 1), in reshape_request()
6476 if (last_sector >= mddev->dev_sectors) in reshape_request()
6477 last_sector = mddev->dev_sectors - 1; in reshape_request()
6479 sh = raid5_get_active_stripe(conf, NULL, first_sector, in reshape_request()
6481 set_bit(STRIPE_EXPAND_SOURCE, &sh->state); in reshape_request()
6482 set_bit(STRIPE_HANDLE, &sh->state); in reshape_request()
6484 first_sector += RAID5_STRIPE_SECTORS(conf); in reshape_request()
6491 list_del_init(&sh->lru); in reshape_request()
6500 if (mddev->curr_resync_completed > mddev->resync_max || in reshape_request()
6501 (sector_nr - mddev->curr_resync_completed) * 2 in reshape_request()
6502 >= mddev->resync_max - mddev->curr_resync_completed) { in reshape_request()
6504 wait_event(conf->wait_for_overlap, in reshape_request()
6505 atomic_read(&conf->reshape_stripes) == 0 in reshape_request()
6506 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6507 if (atomic_read(&conf->reshape_stripes) != 0) in reshape_request()
6509 mddev->reshape_position = conf->reshape_progress; in reshape_request()
6510 mddev->curr_resync_completed = sector_nr; in reshape_request()
6511 if (!mddev->reshape_backwards) in reshape_request()
6514 if (rdev->raid_disk >= 0 && in reshape_request()
6515 !test_bit(Journal, &rdev->flags) && in reshape_request()
6516 !test_bit(In_sync, &rdev->flags) && in reshape_request()
6517 rdev->recovery_offset < sector_nr) in reshape_request()
6518 rdev->recovery_offset = sector_nr; in reshape_request()
6519 conf->reshape_checkpoint = jiffies; in reshape_request()
6520 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in reshape_request()
6521 md_wakeup_thread(mddev->thread); in reshape_request()
6522 wait_event(mddev->sb_wait, in reshape_request()
6523 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) in reshape_request()
6524 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6525 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) in reshape_request()
6527 spin_lock_irq(&conf->device_lock); in reshape_request()
6528 conf->reshape_safe = mddev->reshape_position; in reshape_request()
6529 spin_unlock_irq(&conf->device_lock); in reshape_request()
6530 wake_up(&conf->wait_for_overlap); in reshape_request()
6531 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
6540 struct r5conf *conf = mddev->private; in raid5_sync_request() local
6542 sector_t max_sector = mddev->dev_sectors; in raid5_sync_request()
6550 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { in raid5_sync_request()
6551 end_reshape(conf); in raid5_sync_request()
6555 if (mddev->curr_resync < max_sector) /* aborted */ in raid5_sync_request()
6556 md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, in raid5_sync_request()
6559 conf->fullsync = 0; in raid5_sync_request()
6560 md_bitmap_close_sync(mddev->bitmap); in raid5_sync_request()
6566 wait_event(conf->wait_for_overlap, conf->quiesce != 2); in raid5_sync_request()
6568 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) in raid5_sync_request()
6581 if (mddev->degraded >= conf->max_degraded && in raid5_sync_request()
6582 test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { in raid5_sync_request()
6583 sector_t rv = mddev->dev_sectors - sector_nr; in raid5_sync_request()
6587 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && in raid5_sync_request()
6588 !conf->fullsync && in raid5_sync_request()
6589 !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && in raid5_sync_request()
6590 sync_blocks >= RAID5_STRIPE_SECTORS(conf)) { in raid5_sync_request()
6592 do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf)); in raid5_sync_request()
6595 return sync_blocks * RAID5_STRIPE_SECTORS(conf); in raid5_sync_request()
6598 md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); in raid5_sync_request()
6600 sh = raid5_get_active_stripe(conf, NULL, sector_nr, in raid5_sync_request()
6603 sh = raid5_get_active_stripe(conf, NULL, sector_nr, 0); in raid5_sync_request()
6614 for (i = 0; i < conf->raid_disks; i++) { in raid5_sync_request()
6615 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_sync_request()
6617 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) in raid5_sync_request()
6622 md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); in raid5_sync_request()
6624 set_bit(STRIPE_SYNC_REQUESTED, &sh->state); in raid5_sync_request()
6625 set_bit(STRIPE_HANDLE, &sh->state); in raid5_sync_request()
6629 return RAID5_STRIPE_SECTORS(conf); in raid5_sync_request()
6632 static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, in retry_aligned_read() argument
6637 * We cannot pre-allocate enough stripe_heads as we may need in retry_aligned_read()
6640 * ->bi_hw_segments how many have been done. in retry_aligned_read()
6651 logical_sector = raid_bio->bi_iter.bi_sector & in retry_aligned_read()
6652 ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in retry_aligned_read()
6653 sector = raid5_compute_sector(conf, logical_sector, in retry_aligned_read()
6658 logical_sector += RAID5_STRIPE_SECTORS(conf), in retry_aligned_read()
6659 sector += RAID5_STRIPE_SECTORS(conf), in retry_aligned_read()
6666 sh = raid5_get_active_stripe(conf, NULL, sector, in retry_aligned_read()
6669 /* failed to get a stripe - must wait */ in retry_aligned_read()
6670 conf->retry_read_aligned = raid_bio; in retry_aligned_read()
6671 conf->retry_read_offset = scnt; in retry_aligned_read()
6677 conf->retry_read_aligned = raid_bio; in retry_aligned_read()
6678 conf->retry_read_offset = scnt; in retry_aligned_read()
6682 set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); in retry_aligned_read()
6690 if (atomic_dec_and_test(&conf->active_aligned_reads)) in retry_aligned_read()
6691 wake_up(&conf->wait_for_quiescent); in retry_aligned_read()
6695 static int handle_active_stripes(struct r5conf *conf, int group, in handle_active_stripes() argument
6698 __must_hold(&conf->device_lock) in handle_active_stripes()
6705 (sh = __get_priority_stripe(conf, group)) != NULL) in handle_active_stripes()
6713 spin_unlock_irq(&conf->device_lock); in handle_active_stripes()
6714 log_flush_stripe_to_raid(conf); in handle_active_stripes()
6715 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6720 spin_unlock_irq(&conf->device_lock); in handle_active_stripes()
6722 release_inactive_stripe_list(conf, temp_inactive_list, in handle_active_stripes()
6725 r5l_flush_stripe_to_raid(conf->log); in handle_active_stripes()
6727 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6733 log_write_stripe_run(conf); in handle_active_stripes()
6737 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6739 hash = batch[i]->hash_lock_index; in handle_active_stripes()
6740 __release_stripe(conf, batch[i], &temp_inactive_list[hash]); in handle_active_stripes()
6748 struct r5worker_group *group = worker->group; in raid5_do_work()
6749 struct r5conf *conf = group->conf; in raid5_do_work() local
6750 struct mddev *mddev = conf->mddev; in raid5_do_work()
6751 int group_id = group - conf->worker_groups; in raid5_do_work()
6759 spin_lock_irq(&conf->device_lock); in raid5_do_work()
6763 released = release_stripe_list(conf, worker->temp_inactive_list); in raid5_do_work()
6765 batch_size = handle_active_stripes(conf, group_id, worker, in raid5_do_work()
6766 worker->temp_inactive_list); in raid5_do_work()
6767 worker->working = false; in raid5_do_work()
6771 wait_event_lock_irq(mddev->sb_wait, in raid5_do_work()
6772 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), in raid5_do_work()
6773 conf->device_lock); in raid5_do_work()
6777 spin_unlock_irq(&conf->device_lock); in raid5_do_work()
6779 flush_deferred_bios(conf); in raid5_do_work()
6781 r5l_flush_stripe_to_raid(conf->log); in raid5_do_work()
6786 pr_debug("--- raid5worker inactive\n"); in raid5_do_work()
6798 struct mddev *mddev = thread->mddev; in raid5d()
6799 struct r5conf *conf = mddev->private; in raid5d() local
6809 spin_lock_irq(&conf->device_lock); in raid5d()
6815 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) in raid5d()
6818 released = release_stripe_list(conf, conf->temp_inactive_list); in raid5d()
6820 clear_bit(R5_DID_ALLOC, &conf->cache_state); in raid5d()
6823 !list_empty(&conf->bitmap_list)) { in raid5d()
6825 conf->seq_flush++; in raid5d()
6826 spin_unlock_irq(&conf->device_lock); in raid5d()
6827 md_bitmap_unplug(mddev->bitmap); in raid5d()
6828 spin_lock_irq(&conf->device_lock); in raid5d()
6829 conf->seq_write = conf->seq_flush; in raid5d()
6830 activate_bit_delay(conf, conf->temp_inactive_list); in raid5d()
6832 raid5_activate_delayed(conf); in raid5d()
6834 while ((bio = remove_bio_from_retry(conf, &offset))) { in raid5d()
6836 spin_unlock_irq(&conf->device_lock); in raid5d()
6837 ok = retry_aligned_read(conf, bio, offset); in raid5d()
6838 spin_lock_irq(&conf->device_lock); in raid5d()
6844 batch_size = handle_active_stripes(conf, ANY_GROUP, NULL, in raid5d()
6845 conf->temp_inactive_list); in raid5d()
6850 if (mddev->sb_flags & ~(1 << MD_SB_CHANGE_PENDING)) { in raid5d()
6851 spin_unlock_irq(&conf->device_lock); in raid5d()
6853 spin_lock_irq(&conf->device_lock); in raid5d()
6858 spin_unlock_irq(&conf->device_lock); in raid5d()
6859 if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && in raid5d()
6860 mutex_trylock(&conf->cache_size_mutex)) { in raid5d()
6861 grow_one_stripe(conf, __GFP_NOWARN); in raid5d()
6865 set_bit(R5_DID_ALLOC, &conf->cache_state); in raid5d()
6866 mutex_unlock(&conf->cache_size_mutex); in raid5d()
6869 flush_deferred_bios(conf); in raid5d()
6871 r5l_flush_stripe_to_raid(conf->log); in raid5d()
6876 pr_debug("--- raid5d inactive\n"); in raid5d()
6882 struct r5conf *conf; in raid5_show_stripe_cache_size() local
6884 spin_lock(&mddev->lock); in raid5_show_stripe_cache_size()
6885 conf = mddev->private; in raid5_show_stripe_cache_size()
6886 if (conf) in raid5_show_stripe_cache_size()
6887 ret = sprintf(page, "%d\n", conf->min_nr_stripes); in raid5_show_stripe_cache_size()
6888 spin_unlock(&mddev->lock); in raid5_show_stripe_cache_size()
6896 struct r5conf *conf = mddev->private; in raid5_set_cache_size() local
6899 return -EINVAL; in raid5_set_cache_size()
6901 WRITE_ONCE(conf->min_nr_stripes, size); in raid5_set_cache_size()
6902 mutex_lock(&conf->cache_size_mutex); in raid5_set_cache_size()
6903 while (size < conf->max_nr_stripes && in raid5_set_cache_size()
6904 drop_one_stripe(conf)) in raid5_set_cache_size()
6906 mutex_unlock(&conf->cache_size_mutex); in raid5_set_cache_size()
6910 mutex_lock(&conf->cache_size_mutex); in raid5_set_cache_size()
6911 while (size > conf->max_nr_stripes) in raid5_set_cache_size()
6912 if (!grow_one_stripe(conf, GFP_KERNEL)) { in raid5_set_cache_size()
6913 WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes); in raid5_set_cache_size()
6914 result = -ENOMEM; in raid5_set_cache_size()
6917 mutex_unlock(&conf->cache_size_mutex); in raid5_set_cache_size()
6926 struct r5conf *conf; in raid5_store_stripe_cache_size() local
6931 return -EINVAL; in raid5_store_stripe_cache_size()
6933 return -EINVAL; in raid5_store_stripe_cache_size()
6937 conf = mddev->private; in raid5_store_stripe_cache_size()
6938 if (!conf) in raid5_store_stripe_cache_size()
6939 err = -ENODEV; in raid5_store_stripe_cache_size()
6955 struct r5conf *conf = mddev->private; in raid5_show_rmw_level() local
6956 if (conf) in raid5_show_rmw_level()
6957 return sprintf(page, "%d\n", conf->rmw_level); in raid5_show_rmw_level()
6965 struct r5conf *conf = mddev->private; in raid5_store_rmw_level() local
6968 if (!conf) in raid5_store_rmw_level()
6969 return -ENODEV; in raid5_store_rmw_level()
6972 return -EINVAL; in raid5_store_rmw_level()
6975 return -EINVAL; in raid5_store_rmw_level()
6978 return -EINVAL; in raid5_store_rmw_level()
6983 return -EINVAL; in raid5_store_rmw_level()
6985 conf->rmw_level = new; in raid5_store_rmw_level()
6997 struct r5conf *conf; in raid5_show_stripe_size() local
7000 spin_lock(&mddev->lock); in raid5_show_stripe_size()
7001 conf = mddev->private; in raid5_show_stripe_size()
7002 if (conf) in raid5_show_stripe_size()
7003 ret = sprintf(page, "%lu\n", RAID5_STRIPE_SIZE(conf)); in raid5_show_stripe_size()
7004 spin_unlock(&mddev->lock); in raid5_show_stripe_size()
7012 struct r5conf *conf; in raid5_store_stripe_size() local
7018 return -EINVAL; in raid5_store_stripe_size()
7020 return -EINVAL; in raid5_store_stripe_size()
7030 return -EINVAL; in raid5_store_stripe_size()
7036 conf = mddev->private; in raid5_store_stripe_size()
7037 if (!conf) { in raid5_store_stripe_size()
7038 err = -ENODEV; in raid5_store_stripe_size()
7042 if (new == conf->stripe_size) in raid5_store_stripe_size()
7046 conf->stripe_size, new); in raid5_store_stripe_size()
7048 if (mddev->sync_thread || in raid5_store_stripe_size()
7049 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || in raid5_store_stripe_size()
7050 mddev->reshape_position != MaxSector || in raid5_store_stripe_size()
7051 mddev->sysfs_active) { in raid5_store_stripe_size()
7052 err = -EBUSY; in raid5_store_stripe_size()
7057 mutex_lock(&conf->cache_size_mutex); in raid5_store_stripe_size()
7058 size = conf->max_nr_stripes; in raid5_store_stripe_size()
7060 shrink_stripes(conf); in raid5_store_stripe_size()
7062 conf->stripe_size = new; in raid5_store_stripe_size()
7063 conf->stripe_shift = ilog2(new) - 9; in raid5_store_stripe_size()
7064 conf->stripe_sectors = new >> 9; in raid5_store_stripe_size()
7065 if (grow_stripes(conf, size)) { in raid5_store_stripe_size()
7068 err = -ENOMEM; in raid5_store_stripe_size()
7070 mutex_unlock(&conf->cache_size_mutex); in raid5_store_stripe_size()
7092 struct r5conf *conf; in raid5_show_preread_threshold() local
7094 spin_lock(&mddev->lock); in raid5_show_preread_threshold()
7095 conf = mddev->private; in raid5_show_preread_threshold()
7096 if (conf) in raid5_show_preread_threshold()
7097 ret = sprintf(page, "%d\n", conf->bypass_threshold); in raid5_show_preread_threshold()
7098 spin_unlock(&mddev->lock); in raid5_show_preread_threshold()
7105 struct r5conf *conf; in raid5_store_preread_threshold() local
7110 return -EINVAL; in raid5_store_preread_threshold()
7112 return -EINVAL; in raid5_store_preread_threshold()
7117 conf = mddev->private; in raid5_store_preread_threshold()
7118 if (!conf) in raid5_store_preread_threshold()
7119 err = -ENODEV; in raid5_store_preread_threshold()
7120 else if (new > conf->min_nr_stripes) in raid5_store_preread_threshold()
7121 err = -EINVAL; in raid5_store_preread_threshold()
7123 conf->bypass_threshold = new; in raid5_store_preread_threshold()
7137 struct r5conf *conf; in raid5_show_skip_copy() local
7139 spin_lock(&mddev->lock); in raid5_show_skip_copy()
7140 conf = mddev->private; in raid5_show_skip_copy()
7141 if (conf) in raid5_show_skip_copy()
7142 ret = sprintf(page, "%d\n", conf->skip_copy); in raid5_show_skip_copy()
7143 spin_unlock(&mddev->lock); in raid5_show_skip_copy()
7150 struct r5conf *conf; in raid5_store_skip_copy() local
7155 return -EINVAL; in raid5_store_skip_copy()
7157 return -EINVAL; in raid5_store_skip_copy()
7163 conf = mddev->private; in raid5_store_skip_copy()
7164 if (!conf) in raid5_store_skip_copy()
7165 err = -ENODEV; in raid5_store_skip_copy()
7166 else if (new != conf->skip_copy) { in raid5_store_skip_copy()
7167 struct request_queue *q = mddev->queue; in raid5_store_skip_copy()
7170 conf->skip_copy = new; in raid5_store_skip_copy()
7189 struct r5conf *conf = mddev->private; in stripe_cache_active_show() local
7190 if (conf) in stripe_cache_active_show()
7191 return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); in stripe_cache_active_show()
7202 struct r5conf *conf; in raid5_show_group_thread_cnt() local
7204 spin_lock(&mddev->lock); in raid5_show_group_thread_cnt()
7205 conf = mddev->private; in raid5_show_group_thread_cnt()
7206 if (conf) in raid5_show_group_thread_cnt()
7207 ret = sprintf(page, "%d\n", conf->worker_cnt_per_group); in raid5_show_group_thread_cnt()
7208 spin_unlock(&mddev->lock); in raid5_show_group_thread_cnt()
7212 static int alloc_thread_groups(struct r5conf *conf, int cnt,
7218 struct r5conf *conf; in raid5_store_group_thread_cnt() local
7225 return -EINVAL; in raid5_store_group_thread_cnt()
7227 return -EINVAL; in raid5_store_group_thread_cnt()
7230 return -EINVAL; in raid5_store_group_thread_cnt()
7235 conf = mddev->private; in raid5_store_group_thread_cnt()
7236 if (!conf) in raid5_store_group_thread_cnt()
7237 err = -ENODEV; in raid5_store_group_thread_cnt()
7238 else if (new != conf->worker_cnt_per_group) { in raid5_store_group_thread_cnt()
7241 old_groups = conf->worker_groups; in raid5_store_group_thread_cnt()
7245 err = alloc_thread_groups(conf, new, &group_cnt, &new_groups); in raid5_store_group_thread_cnt()
7247 spin_lock_irq(&conf->device_lock); in raid5_store_group_thread_cnt()
7248 conf->group_cnt = group_cnt; in raid5_store_group_thread_cnt()
7249 conf->worker_cnt_per_group = new; in raid5_store_group_thread_cnt()
7250 conf->worker_groups = new_groups; in raid5_store_group_thread_cnt()
7251 spin_unlock_irq(&conf->device_lock); in raid5_store_group_thread_cnt()
7286 static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt, in alloc_thread_groups() argument
7306 return -ENOMEM; in alloc_thread_groups()
7313 INIT_LIST_HEAD(&group->handle_list); in alloc_thread_groups()
7314 INIT_LIST_HEAD(&group->loprio_list); in alloc_thread_groups()
7315 group->conf = conf; in alloc_thread_groups()
7316 group->workers = workers + i * cnt; in alloc_thread_groups()
7319 struct r5worker *worker = group->workers + j; in alloc_thread_groups()
7320 worker->group = group; in alloc_thread_groups()
7321 INIT_WORK(&worker->work, raid5_do_work); in alloc_thread_groups()
7324 INIT_LIST_HEAD(worker->temp_inactive_list + k); in alloc_thread_groups()
7331 static void free_thread_groups(struct r5conf *conf) in free_thread_groups() argument
7333 if (conf->worker_groups) in free_thread_groups()
7334 kfree(conf->worker_groups[0].workers); in free_thread_groups()
7335 kfree(conf->worker_groups); in free_thread_groups()
7336 conf->worker_groups = NULL; in free_thread_groups()
7342 struct r5conf *conf = mddev->private; in raid5_size() local
7345 sectors = mddev->dev_sectors; in raid5_size()
7348 raid_disks = min(conf->raid_disks, conf->previous_raid_disks); in raid5_size()
7350 sectors &= ~((sector_t)conf->chunk_sectors - 1); in raid5_size()
7351 sectors &= ~((sector_t)conf->prev_chunk_sectors - 1); in raid5_size()
7352 return sectors * (raid_disks - conf->max_degraded); in raid5_size()
7355 static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) in free_scratch_buffer() argument
7357 safe_put_page(percpu->spare_page); in free_scratch_buffer()
7358 percpu->spare_page = NULL; in free_scratch_buffer()
7359 kvfree(percpu->scribble); in free_scratch_buffer()
7360 percpu->scribble = NULL; in free_scratch_buffer()
7363 static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) in alloc_scratch_buffer() argument
7365 if (conf->level == 6 && !percpu->spare_page) { in alloc_scratch_buffer()
7366 percpu->spare_page = alloc_page(GFP_KERNEL); in alloc_scratch_buffer()
7367 if (!percpu->spare_page) in alloc_scratch_buffer()
7368 return -ENOMEM; in alloc_scratch_buffer()
7372 max(conf->raid_disks, in alloc_scratch_buffer()
7373 conf->previous_raid_disks), in alloc_scratch_buffer()
7374 max(conf->chunk_sectors, in alloc_scratch_buffer()
7375 conf->prev_chunk_sectors) in alloc_scratch_buffer()
7376 / RAID5_STRIPE_SECTORS(conf))) { in alloc_scratch_buffer()
7377 free_scratch_buffer(conf, percpu); in alloc_scratch_buffer()
7378 return -ENOMEM; in alloc_scratch_buffer()
7381 local_lock_init(&percpu->lock); in alloc_scratch_buffer()
7387 struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); in raid456_cpu_dead() local
7389 free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); in raid456_cpu_dead()
7393 static void raid5_free_percpu(struct r5conf *conf) in raid5_free_percpu() argument
7395 if (!conf->percpu) in raid5_free_percpu()
7398 cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); in raid5_free_percpu()
7399 free_percpu(conf->percpu); in raid5_free_percpu()
7402 static void free_conf(struct r5conf *conf) in free_conf() argument
7406 log_exit(conf); in free_conf()
7408 unregister_shrinker(&conf->shrinker); in free_conf()
7409 free_thread_groups(conf); in free_conf()
7410 shrink_stripes(conf); in free_conf()
7411 raid5_free_percpu(conf); in free_conf()
7412 for (i = 0; i < conf->pool_size; i++) in free_conf()
7413 if (conf->disks[i].extra_page) in free_conf()
7414 put_page(conf->disks[i].extra_page); in free_conf()
7415 kfree(conf->disks); in free_conf()
7416 bioset_exit(&conf->bio_split); in free_conf()
7417 kfree(conf->stripe_hashtbl); in free_conf()
7418 kfree(conf->pending_data); in free_conf()
7419 kfree(conf); in free_conf()
7424 struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); in raid456_cpu_up_prepare() local
7425 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); in raid456_cpu_up_prepare()
7427 if (alloc_scratch_buffer(conf, percpu)) { in raid456_cpu_up_prepare()
7430 return -ENOMEM; in raid456_cpu_up_prepare()
7435 static int raid5_alloc_percpu(struct r5conf *conf) in raid5_alloc_percpu() argument
7439 conf->percpu = alloc_percpu(struct raid5_percpu); in raid5_alloc_percpu()
7440 if (!conf->percpu) in raid5_alloc_percpu()
7441 return -ENOMEM; in raid5_alloc_percpu()
7443 err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); in raid5_alloc_percpu()
7445 conf->scribble_disks = max(conf->raid_disks, in raid5_alloc_percpu()
7446 conf->previous_raid_disks); in raid5_alloc_percpu()
7447 conf->scribble_sectors = max(conf->chunk_sectors, in raid5_alloc_percpu()
7448 conf->prev_chunk_sectors); in raid5_alloc_percpu()
7456 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); in raid5_cache_scan() local
7459 if (mutex_trylock(&conf->cache_size_mutex)) { in raid5_cache_scan()
7461 while (ret < sc->nr_to_scan && in raid5_cache_scan()
7462 conf->max_nr_stripes > conf->min_nr_stripes) { in raid5_cache_scan()
7463 if (drop_one_stripe(conf) == 0) { in raid5_cache_scan()
7469 mutex_unlock(&conf->cache_size_mutex); in raid5_cache_scan()
7477 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); in raid5_cache_count() local
7478 int max_stripes = READ_ONCE(conf->max_nr_stripes); in raid5_cache_count()
7479 int min_stripes = READ_ONCE(conf->min_nr_stripes); in raid5_cache_count()
7484 return max_stripes - min_stripes; in raid5_cache_count()
7489 struct r5conf *conf; in setup_conf() local
7497 int ret = -ENOMEM; in setup_conf()
7499 if (mddev->new_level != 5 in setup_conf()
7500 && mddev->new_level != 4 in setup_conf()
7501 && mddev->new_level != 6) { in setup_conf()
7503 mdname(mddev), mddev->new_level); in setup_conf()
7504 return ERR_PTR(-EIO); in setup_conf()
7506 if ((mddev->new_level == 5 in setup_conf()
7507 && !algorithm_valid_raid5(mddev->new_layout)) || in setup_conf()
7508 (mddev->new_level == 6 in setup_conf()
7509 && !algorithm_valid_raid6(mddev->new_layout))) { in setup_conf()
7511 mdname(mddev), mddev->new_layout); in setup_conf()
7512 return ERR_PTR(-EIO); in setup_conf()
7514 if (mddev->new_level == 6 && mddev->raid_disks < 4) { in setup_conf()
7516 mdname(mddev), mddev->raid_disks); in setup_conf()
7517 return ERR_PTR(-EINVAL); in setup_conf()
7520 if (!mddev->new_chunk_sectors || in setup_conf()
7521 (mddev->new_chunk_sectors << 9) % PAGE_SIZE || in setup_conf()
7522 !is_power_of_2(mddev->new_chunk_sectors)) { in setup_conf()
7524 mdname(mddev), mddev->new_chunk_sectors << 9); in setup_conf()
7525 return ERR_PTR(-EINVAL); in setup_conf()
7528 conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); in setup_conf()
7529 if (conf == NULL) in setup_conf()
7533 conf->stripe_size = DEFAULT_STRIPE_SIZE; in setup_conf()
7534 conf->stripe_shift = ilog2(DEFAULT_STRIPE_SIZE) - 9; in setup_conf()
7535 conf->stripe_sectors = DEFAULT_STRIPE_SIZE >> 9; in setup_conf()
7537 INIT_LIST_HEAD(&conf->free_list); in setup_conf()
7538 INIT_LIST_HEAD(&conf->pending_list); in setup_conf()
7539 conf->pending_data = kcalloc(PENDING_IO_MAX, in setup_conf()
7542 if (!conf->pending_data) in setup_conf()
7545 list_add(&conf->pending_data[i].sibling, &conf->free_list); in setup_conf()
7546 /* Don't enable multi-threading by default*/ in setup_conf()
7547 if (!alloc_thread_groups(conf, 0, &group_cnt, &new_group)) { in setup_conf()
7548 conf->group_cnt = group_cnt; in setup_conf()
7549 conf->worker_cnt_per_group = 0; in setup_conf()
7550 conf->worker_groups = new_group; in setup_conf()
7553 spin_lock_init(&conf->device_lock); in setup_conf()
7554 seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock); in setup_conf()
7555 mutex_init(&conf->cache_size_mutex); in setup_conf()
7557 init_waitqueue_head(&conf->wait_for_quiescent); in setup_conf()
7558 init_waitqueue_head(&conf->wait_for_stripe); in setup_conf()
7559 init_waitqueue_head(&conf->wait_for_overlap); in setup_conf()
7560 INIT_LIST_HEAD(&conf->handle_list); in setup_conf()
7561 INIT_LIST_HEAD(&conf->loprio_list); in setup_conf()
7562 INIT_LIST_HEAD(&conf->hold_list); in setup_conf()
7563 INIT_LIST_HEAD(&conf->delayed_list); in setup_conf()
7564 INIT_LIST_HEAD(&conf->bitmap_list); in setup_conf()
7565 init_llist_head(&conf->released_stripes); in setup_conf()
7566 atomic_set(&conf->active_stripes, 0); in setup_conf()
7567 atomic_set(&conf->preread_active_stripes, 0); in setup_conf()
7568 atomic_set(&conf->active_aligned_reads, 0); in setup_conf()
7569 spin_lock_init(&conf->pending_bios_lock); in setup_conf()
7570 conf->batch_bio_dispatch = true; in setup_conf()
7572 if (test_bit(Journal, &rdev->flags)) in setup_conf()
7574 if (bdev_nonrot(rdev->bdev)) { in setup_conf()
7575 conf->batch_bio_dispatch = false; in setup_conf()
7580 conf->bypass_threshold = BYPASS_THRESHOLD; in setup_conf()
7581 conf->recovery_disabled = mddev->recovery_disabled - 1; in setup_conf()
7583 conf->raid_disks = mddev->raid_disks; in setup_conf()
7584 if (mddev->reshape_position == MaxSector) in setup_conf()
7585 conf->previous_raid_disks = mddev->raid_disks; in setup_conf()
7587 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; in setup_conf()
7588 max_disks = max(conf->raid_disks, conf->previous_raid_disks); in setup_conf()
7590 conf->disks = kcalloc(max_disks, sizeof(struct disk_info), in setup_conf()
7593 if (!conf->disks) in setup_conf()
7597 conf->disks[i].extra_page = alloc_page(GFP_KERNEL); in setup_conf()
7598 if (!conf->disks[i].extra_page) in setup_conf()
7602 ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); in setup_conf()
7605 conf->mddev = mddev; in setup_conf()
7607 ret = -ENOMEM; in setup_conf()
7608 conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL); in setup_conf()
7609 if (!conf->stripe_hashtbl) in setup_conf()
7617 spin_lock_init(conf->hash_locks); in setup_conf()
7619 spin_lock_init(conf->hash_locks + i); in setup_conf()
7622 INIT_LIST_HEAD(conf->inactive_list + i); in setup_conf()
7625 INIT_LIST_HEAD(conf->temp_inactive_list + i); in setup_conf()
7627 atomic_set(&conf->r5c_cached_full_stripes, 0); in setup_conf()
7628 INIT_LIST_HEAD(&conf->r5c_full_stripe_list); in setup_conf()
7629 atomic_set(&conf->r5c_cached_partial_stripes, 0); in setup_conf()
7630 INIT_LIST_HEAD(&conf->r5c_partial_stripe_list); in setup_conf()
7631 atomic_set(&conf->r5c_flushing_full_stripes, 0); in setup_conf()
7632 atomic_set(&conf->r5c_flushing_partial_stripes, 0); in setup_conf()
7634 conf->level = mddev->new_level; in setup_conf()
7635 conf->chunk_sectors = mddev->new_chunk_sectors; in setup_conf()
7636 ret = raid5_alloc_percpu(conf); in setup_conf()
7642 ret = -EIO; in setup_conf()
7644 raid_disk = rdev->raid_disk; in setup_conf()
7646 || raid_disk < 0 || test_bit(Journal, &rdev->flags)) in setup_conf()
7648 disk = conf->disks + raid_disk; in setup_conf()
7650 if (test_bit(Replacement, &rdev->flags)) { in setup_conf()
7651 if (disk->replacement) in setup_conf()
7653 RCU_INIT_POINTER(disk->replacement, rdev); in setup_conf()
7655 if (disk->rdev) in setup_conf()
7657 RCU_INIT_POINTER(disk->rdev, rdev); in setup_conf()
7660 if (test_bit(In_sync, &rdev->flags)) { in setup_conf()
7662 mdname(mddev), rdev->bdev, raid_disk); in setup_conf()
7663 } else if (rdev->saved_raid_disk != raid_disk) in setup_conf()
7665 conf->fullsync = 1; in setup_conf()
7668 conf->level = mddev->new_level; in setup_conf()
7669 if (conf->level == 6) { in setup_conf()
7670 conf->max_degraded = 2; in setup_conf()
7672 conf->rmw_level = PARITY_ENABLE_RMW; in setup_conf()
7674 conf->rmw_level = PARITY_DISABLE_RMW; in setup_conf()
7676 conf->max_degraded = 1; in setup_conf()
7677 conf->rmw_level = PARITY_ENABLE_RMW; in setup_conf()
7679 conf->algorithm = mddev->new_layout; in setup_conf()
7680 conf->reshape_progress = mddev->reshape_position; in setup_conf()
7681 if (conf->reshape_progress != MaxSector) { in setup_conf()
7682 conf->prev_chunk_sectors = mddev->chunk_sectors; in setup_conf()
7683 conf->prev_algo = mddev->layout; in setup_conf()
7685 conf->prev_chunk_sectors = conf->chunk_sectors; in setup_conf()
7686 conf->prev_algo = conf->algorithm; in setup_conf()
7689 conf->min_nr_stripes = NR_STRIPES; in setup_conf()
7690 if (mddev->reshape_position != MaxSector) { in setup_conf()
7692 ((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4, in setup_conf()
7693 ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4); in setup_conf()
7694 conf->min_nr_stripes = max(NR_STRIPES, stripes); in setup_conf()
7695 if (conf->min_nr_stripes != NR_STRIPES) in setup_conf()
7697 mdname(mddev), conf->min_nr_stripes); in setup_conf()
7699 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + in setup_conf()
7701 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); in setup_conf()
7702 if (grow_stripes(conf, conf->min_nr_stripes)) { in setup_conf()
7705 ret = -ENOMEM; in setup_conf()
7714 conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4; in setup_conf()
7715 conf->shrinker.scan_objects = raid5_cache_scan; in setup_conf()
7716 conf->shrinker.count_objects = raid5_cache_count; in setup_conf()
7717 conf->shrinker.batch = 128; in setup_conf()
7718 conf->shrinker.flags = 0; in setup_conf()
7719 ret = register_shrinker(&conf->shrinker, "md-raid5:%s", mdname(mddev)); in setup_conf()
7726 sprintf(pers_name, "raid%d", mddev->new_level); in setup_conf()
7727 rcu_assign_pointer(conf->thread, in setup_conf()
7729 if (!conf->thread) { in setup_conf()
7732 ret = -ENOMEM; in setup_conf()
7736 return conf; in setup_conf()
7739 if (conf) in setup_conf()
7740 free_conf(conf); in setup_conf()
7752 if (raid_disk >= raid_disks - max_degraded) in only_parity()
7757 raid_disk == raid_disks - 1) in only_parity()
7764 if (raid_disk == raid_disks - 1) in only_parity()
7770 static void raid5_set_io_opt(struct r5conf *conf) in raid5_set_io_opt() argument
7772 blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * in raid5_set_io_opt()
7773 (conf->raid_disks - conf->max_degraded)); in raid5_set_io_opt()
7778 struct r5conf *conf; in raid5_run() local
7788 return -ENOMEM; in raid5_run()
7790 if (mddev->recovery_cp != MaxSector) in raid5_run()
7791 pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", in raid5_run()
7797 if (test_bit(Journal, &rdev->flags)) { in raid5_run()
7801 if (rdev->raid_disk < 0) in raid5_run()
7803 diff = (rdev->new_data_offset - rdev->data_offset); in raid5_run()
7807 } else if (mddev->reshape_backwards && in raid5_run()
7810 else if (!mddev->reshape_backwards && in raid5_run()
7815 if ((test_bit(MD_HAS_JOURNAL, &mddev->flags) || journal_dev) && in raid5_run()
7816 (mddev->bitmap_info.offset || mddev->bitmap_info.file)) { in raid5_run()
7819 return -EINVAL; in raid5_run()
7822 if (mddev->reshape_position != MaxSector) { in raid5_run()
7832 * critical areas read-only and backed up. It will start in raid5_run()
7833 * the array in read-only mode, so we check for that. in raid5_run()
7837 int max_degraded = (mddev->level == 6 ? 2 : 1); in raid5_run()
7842 pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", in raid5_run()
7844 return -EINVAL; in raid5_run()
7847 if (mddev->new_level != mddev->level) { in raid5_run()
7848 pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", in raid5_run()
7850 return -EINVAL; in raid5_run()
7852 old_disks = mddev->raid_disks - mddev->delta_disks; in raid5_run()
7853 /* reshape_position must be on a new-stripe boundary, and one in raid5_run()
7860 here_new = mddev->reshape_position; in raid5_run()
7861 chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors); in raid5_run()
7862 new_data_disks = mddev->raid_disks - max_degraded; in raid5_run()
7866 return -EINVAL; in raid5_run()
7870 here_old = mddev->reshape_position; in raid5_run()
7871 sector_div(here_old, chunk_sectors * (old_disks-max_degraded)); in raid5_run()
7874 if (mddev->delta_disks == 0) { in raid5_run()
7875 /* We cannot be sure it is safe to start an in-place in raid5_run()
7876 * reshape. It is only safe if user-space is monitoring in raid5_run()
7882 if (abs(min_offset_diff) >= mddev->chunk_sectors && in raid5_run()
7883 abs(min_offset_diff) >= mddev->new_chunk_sectors) in raid5_run()
7884 /* not really in-place - so OK */; in raid5_run()
7885 else if (mddev->ro == 0) { in raid5_run()
7886 pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", in raid5_run()
7888 return -EINVAL; in raid5_run()
7890 } else if (mddev->reshape_backwards in raid5_run()
7894 here_old * chunk_sectors + (-min_offset_diff))) { in raid5_run()
7895 /* Reading from the same stripe as writing to - bad */ in raid5_run()
7896 pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", in raid5_run()
7898 return -EINVAL; in raid5_run()
7903 BUG_ON(mddev->level != mddev->new_level); in raid5_run()
7904 BUG_ON(mddev->layout != mddev->new_layout); in raid5_run()
7905 BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); in raid5_run()
7906 BUG_ON(mddev->delta_disks != 0); in raid5_run()
7909 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && in raid5_run()
7910 test_bit(MD_HAS_PPL, &mddev->flags)) { in raid5_run()
7911 pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", in raid5_run()
7913 clear_bit(MD_HAS_PPL, &mddev->flags); in raid5_run()
7914 clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags); in raid5_run()
7917 if (mddev->private == NULL) in raid5_run()
7918 conf = setup_conf(mddev); in raid5_run()
7920 conf = mddev->private; in raid5_run()
7922 if (IS_ERR(conf)) in raid5_run()
7923 return PTR_ERR(conf); in raid5_run()
7925 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { in raid5_run()
7929 mddev->ro = 1; in raid5_run()
7930 set_disk_ro(mddev->gendisk, 1); in raid5_run()
7931 } else if (mddev->recovery_cp == MaxSector) in raid5_run()
7932 set_bit(MD_JOURNAL_CLEAN, &mddev->flags); in raid5_run()
7935 conf->min_offset_diff = min_offset_diff; in raid5_run()
7936 rcu_assign_pointer(mddev->thread, conf->thread); in raid5_run()
7937 rcu_assign_pointer(conf->thread, NULL); in raid5_run()
7938 mddev->private = conf; in raid5_run()
7940 for (i = 0; i < conf->raid_disks && conf->previous_raid_disks; in raid5_run()
7942 rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev); in raid5_run()
7943 if (!rdev && conf->disks[i].replacement) { in raid5_run()
7946 conf->disks[i].replacement); in raid5_run()
7947 conf->disks[i].replacement = NULL; in raid5_run()
7948 clear_bit(Replacement, &rdev->flags); in raid5_run()
7949 rcu_assign_pointer(conf->disks[i].rdev, rdev); in raid5_run()
7953 if (rcu_access_pointer(conf->disks[i].replacement) && in raid5_run()
7954 conf->reshape_progress != MaxSector) { in raid5_run()
7959 if (test_bit(In_sync, &rdev->flags)) in raid5_run()
7961 /* This disc is not fully in-sync. However if it in raid5_run()
7970 if (mddev->major_version == 0 && in raid5_run()
7971 mddev->minor_version > 90) in raid5_run()
7972 rdev->recovery_offset = reshape_offset; in raid5_run()
7974 if (rdev->recovery_offset < reshape_offset) { in raid5_run()
7976 if (!only_parity(rdev->raid_disk, in raid5_run()
7977 conf->algorithm, in raid5_run()
7978 conf->raid_disks, in raid5_run()
7979 conf->max_degraded)) in raid5_run()
7982 if (!only_parity(rdev->raid_disk, in raid5_run()
7983 conf->prev_algo, in raid5_run()
7984 conf->previous_raid_disks, in raid5_run()
7985 conf->max_degraded)) in raid5_run()
7993 mddev->degraded = raid5_calc_degraded(conf); in raid5_run()
7995 if (has_failed(conf)) { in raid5_run()
7997 mdname(mddev), mddev->degraded, conf->raid_disks); in raid5_run()
8002 mddev->dev_sectors &= ~((sector_t)mddev->chunk_sectors - 1); in raid5_run()
8003 mddev->resync_max_sectors = mddev->dev_sectors; in raid5_run()
8005 if (mddev->degraded > dirty_parity_disks && in raid5_run()
8006 mddev->recovery_cp != MaxSector) { in raid5_run()
8007 if (test_bit(MD_HAS_PPL, &mddev->flags)) in raid5_run()
8010 else if (mddev->ok_start_degraded) in raid5_run()
8011 pr_crit("md/raid:%s: starting dirty degraded array - data corruption possible.\n", in raid5_run()
8021 mdname(mddev), conf->level, in raid5_run()
8022 mddev->raid_disks-mddev->degraded, mddev->raid_disks, in raid5_run()
8023 mddev->new_layout); in raid5_run()
8025 print_raid5_conf(conf); in raid5_run()
8027 if (conf->reshape_progress != MaxSector) { in raid5_run()
8028 conf->reshape_safe = conf->reshape_progress; in raid5_run()
8029 atomic_set(&conf->reshape_stripes, 0); in raid5_run()
8030 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); in raid5_run()
8031 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); in raid5_run()
8032 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); in raid5_run()
8033 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); in raid5_run()
8034 rcu_assign_pointer(mddev->sync_thread, in raid5_run()
8036 if (!mddev->sync_thread) in raid5_run()
8041 if (mddev->to_remove == &raid5_attrs_group) in raid5_run()
8042 mddev->to_remove = NULL; in raid5_run()
8043 else if (mddev->kobj.sd && in raid5_run()
8044 sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) in raid5_run()
8049 if (mddev->queue) { in raid5_run()
8051 /* read-ahead size must cover two whole stripes, which in raid5_run()
8055 int data_disks = conf->previous_raid_disks - conf->max_degraded; in raid5_run()
8057 ((mddev->chunk_sectors << 9) / PAGE_SIZE); in raid5_run()
8059 chunk_size = mddev->chunk_sectors << 9; in raid5_run()
8060 blk_queue_io_min(mddev->queue, chunk_size); in raid5_run()
8061 raid5_set_io_opt(conf); in raid5_run()
8062 mddev->queue->limits.raid_partial_stripes_expensive = 1; in raid5_run()
8069 mddev->queue->limits.discard_granularity = stripe; in raid5_run()
8071 blk_queue_max_write_zeroes_sectors(mddev->queue, 0); in raid5_run()
8074 disk_stack_limits(mddev->gendisk, rdev->bdev, in raid5_run()
8075 rdev->data_offset << 9); in raid5_run()
8076 disk_stack_limits(mddev->gendisk, rdev->bdev, in raid5_run()
8077 rdev->new_data_offset << 9); in raid5_run()
8096 mddev->queue->limits.max_discard_sectors < (stripe >> 9) || in raid5_run()
8097 mddev->queue->limits.discard_granularity < stripe) in raid5_run()
8098 blk_queue_max_discard_sectors(mddev->queue, 0); in raid5_run()
8104 blk_queue_max_hw_sectors(mddev->queue, in raid5_run()
8105 RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf)); in raid5_run()
8108 blk_queue_max_segments(mddev->queue, USHRT_MAX); in raid5_run()
8111 if (log_init(conf, journal_dev, raid5_has_ppl(conf))) in raid5_run()
8116 md_unregister_thread(mddev, &mddev->thread); in raid5_run()
8117 print_raid5_conf(conf); in raid5_run()
8118 free_conf(conf); in raid5_run()
8119 mddev->private = NULL; in raid5_run()
8121 return -EIO; in raid5_run()
8126 struct r5conf *conf = priv; in raid5_free() local
8128 free_conf(conf); in raid5_free()
8129 mddev->to_remove = &raid5_attrs_group; in raid5_free()
8134 struct r5conf *conf = mddev->private; in raid5_status() local
8137 seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, in raid5_status()
8138 conf->chunk_sectors / 2, mddev->layout); in raid5_status()
8139 seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); in raid5_status()
8141 for (i = 0; i < conf->raid_disks; i++) { in raid5_status()
8142 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_status()
8143 seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); in raid5_status()
8149 static void print_raid5_conf (struct r5conf *conf) in print_raid5_conf() argument
8154 pr_debug("RAID conf printout:\n"); in print_raid5_conf()
8155 if (!conf) { in print_raid5_conf()
8156 pr_debug("(conf==NULL)\n"); in print_raid5_conf()
8159 pr_debug(" --- level:%d rd:%d wd:%d\n", conf->level, in print_raid5_conf()
8160 conf->raid_disks, in print_raid5_conf()
8161 conf->raid_disks - conf->mddev->degraded); in print_raid5_conf()
8164 for (i = 0; i < conf->raid_disks; i++) { in print_raid5_conf()
8165 rdev = rcu_dereference(conf->disks[i].rdev); in print_raid5_conf()
8168 i, !test_bit(Faulty, &rdev->flags), in print_raid5_conf()
8169 rdev->bdev); in print_raid5_conf()
8177 struct r5conf *conf = mddev->private; in raid5_spare_active() local
8182 for (i = 0; i < conf->raid_disks; i++) { in raid5_spare_active()
8183 rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev); in raid5_spare_active()
8185 conf->disks[i].replacement); in raid5_spare_active()
8187 && replacement->recovery_offset == MaxSector in raid5_spare_active()
8188 && !test_bit(Faulty, &replacement->flags) in raid5_spare_active()
8189 && !test_and_set_bit(In_sync, &replacement->flags)) { in raid5_spare_active()
8192 || !test_and_clear_bit(In_sync, &rdev->flags)) in raid5_spare_active()
8197 * and never re-added. in raid5_spare_active()
8199 set_bit(Faulty, &rdev->flags); in raid5_spare_active()
8201 rdev->sysfs_state); in raid5_spare_active()
8203 sysfs_notify_dirent_safe(replacement->sysfs_state); in raid5_spare_active()
8205 && rdev->recovery_offset == MaxSector in raid5_spare_active()
8206 && !test_bit(Faulty, &rdev->flags) in raid5_spare_active()
8207 && !test_and_set_bit(In_sync, &rdev->flags)) { in raid5_spare_active()
8209 sysfs_notify_dirent_safe(rdev->sysfs_state); in raid5_spare_active()
8212 spin_lock_irqsave(&conf->device_lock, flags); in raid5_spare_active()
8213 mddev->degraded = raid5_calc_degraded(conf); in raid5_spare_active()
8214 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_spare_active()
8215 print_raid5_conf(conf); in raid5_spare_active()
8221 struct r5conf *conf = mddev->private; in raid5_remove_disk() local
8223 int number = rdev->raid_disk; in raid5_remove_disk()
8228 print_raid5_conf(conf); in raid5_remove_disk()
8229 if (test_bit(Journal, &rdev->flags) && conf->log) { in raid5_remove_disk()
8236 if (atomic_read(&conf->active_stripes) || in raid5_remove_disk()
8237 atomic_read(&conf->r5c_cached_full_stripes) || in raid5_remove_disk()
8238 atomic_read(&conf->r5c_cached_partial_stripes)) { in raid5_remove_disk()
8239 return -EBUSY; in raid5_remove_disk()
8241 log_exit(conf); in raid5_remove_disk()
8244 if (unlikely(number >= conf->pool_size)) in raid5_remove_disk()
8246 p = conf->disks + number; in raid5_remove_disk()
8247 if (rdev == rcu_access_pointer(p->rdev)) in raid5_remove_disk()
8248 rdevp = &p->rdev; in raid5_remove_disk()
8249 else if (rdev == rcu_access_pointer(p->replacement)) in raid5_remove_disk()
8250 rdevp = &p->replacement; in raid5_remove_disk()
8254 if (number >= conf->raid_disks && in raid5_remove_disk()
8255 conf->reshape_progress == MaxSector) in raid5_remove_disk()
8256 clear_bit(In_sync, &rdev->flags); in raid5_remove_disk()
8258 if (test_bit(In_sync, &rdev->flags) || in raid5_remove_disk()
8259 atomic_read(&rdev->nr_pending)) { in raid5_remove_disk()
8260 err = -EBUSY; in raid5_remove_disk()
8263 /* Only remove non-faulty devices if recovery in raid5_remove_disk()
8266 if (!test_bit(Faulty, &rdev->flags) && in raid5_remove_disk()
8267 mddev->recovery_disabled != conf->recovery_disabled && in raid5_remove_disk()
8268 !has_failed(conf) && in raid5_remove_disk()
8269 (!rcu_access_pointer(p->replacement) || in raid5_remove_disk()
8270 rcu_access_pointer(p->replacement) == rdev) && in raid5_remove_disk()
8271 number < conf->raid_disks) { in raid5_remove_disk()
8272 err = -EBUSY; in raid5_remove_disk()
8276 if (!test_bit(RemoveSynchronized, &rdev->flags)) { in raid5_remove_disk()
8277 lockdep_assert_held(&mddev->reconfig_mutex); in raid5_remove_disk()
8279 if (atomic_read(&rdev->nr_pending)) { in raid5_remove_disk()
8281 err = -EBUSY; in raid5_remove_disk()
8286 err = log_modify(conf, rdev, false); in raid5_remove_disk()
8291 tmp = rcu_access_pointer(p->replacement); in raid5_remove_disk()
8294 rcu_assign_pointer(p->rdev, tmp); in raid5_remove_disk()
8295 clear_bit(Replacement, &tmp->flags); in raid5_remove_disk()
8297 * but will never see neither - if they are careful in raid5_remove_disk()
8299 rcu_assign_pointer(p->replacement, NULL); in raid5_remove_disk()
8302 err = log_modify(conf, tmp, true); in raid5_remove_disk()
8305 clear_bit(WantReplacement, &rdev->flags); in raid5_remove_disk()
8308 print_raid5_conf(conf); in raid5_remove_disk()
8314 struct r5conf *conf = mddev->private; in raid5_add_disk() local
8315 int ret, err = -EEXIST; in raid5_add_disk()
8320 int last = conf->raid_disks - 1; in raid5_add_disk()
8322 if (test_bit(Journal, &rdev->flags)) { in raid5_add_disk()
8323 if (conf->log) in raid5_add_disk()
8324 return -EBUSY; in raid5_add_disk()
8326 rdev->raid_disk = 0; in raid5_add_disk()
8331 ret = log_init(conf, rdev, false); in raid5_add_disk()
8335 ret = r5l_start(conf->log); in raid5_add_disk()
8341 if (mddev->recovery_disabled == conf->recovery_disabled) in raid5_add_disk()
8342 return -EBUSY; in raid5_add_disk()
8344 if (rdev->saved_raid_disk < 0 && has_failed(conf)) in raid5_add_disk()
8346 return -EINVAL; in raid5_add_disk()
8348 if (rdev->raid_disk >= 0) in raid5_add_disk()
8349 first = last = rdev->raid_disk; in raid5_add_disk()
8352 * find the disk ... but prefer rdev->saved_raid_disk in raid5_add_disk()
8355 if (rdev->saved_raid_disk >= first && in raid5_add_disk()
8356 rdev->saved_raid_disk <= last && in raid5_add_disk()
8357 conf->disks[rdev->saved_raid_disk].rdev == NULL) in raid5_add_disk()
8358 first = rdev->saved_raid_disk; in raid5_add_disk()
8361 p = conf->disks + disk; in raid5_add_disk()
8362 if (p->rdev == NULL) { in raid5_add_disk()
8363 clear_bit(In_sync, &rdev->flags); in raid5_add_disk()
8364 rdev->raid_disk = disk; in raid5_add_disk()
8365 if (rdev->saved_raid_disk != disk) in raid5_add_disk()
8366 conf->fullsync = 1; in raid5_add_disk()
8367 rcu_assign_pointer(p->rdev, rdev); in raid5_add_disk()
8369 err = log_modify(conf, rdev, true); in raid5_add_disk()
8375 p = conf->disks + disk; in raid5_add_disk()
8376 tmp = rdev_mdlock_deref(mddev, p->rdev); in raid5_add_disk()
8377 if (test_bit(WantReplacement, &tmp->flags) && in raid5_add_disk()
8378 mddev->reshape_position == MaxSector && in raid5_add_disk()
8379 p->replacement == NULL) { in raid5_add_disk()
8380 clear_bit(In_sync, &rdev->flags); in raid5_add_disk()
8381 set_bit(Replacement, &rdev->flags); in raid5_add_disk()
8382 rdev->raid_disk = disk; in raid5_add_disk()
8384 conf->fullsync = 1; in raid5_add_disk()
8385 rcu_assign_pointer(p->replacement, rdev); in raid5_add_disk()
8390 print_raid5_conf(conf); in raid5_add_disk()
8404 struct r5conf *conf = mddev->private; in raid5_resize() local
8406 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in raid5_resize()
8407 return -EINVAL; in raid5_resize()
8408 sectors &= ~((sector_t)conf->chunk_sectors - 1); in raid5_resize()
8409 newsize = raid5_size(mddev, sectors, mddev->raid_disks); in raid5_resize()
8410 if (mddev->external_size && in raid5_resize()
8411 mddev->array_sectors > newsize) in raid5_resize()
8412 return -EINVAL; in raid5_resize()
8413 if (mddev->bitmap) { in raid5_resize()
8414 int ret = md_bitmap_resize(mddev->bitmap, sectors, 0, 0); in raid5_resize()
8419 if (sectors > mddev->dev_sectors && in raid5_resize()
8420 mddev->recovery_cp > mddev->dev_sectors) { in raid5_resize()
8421 mddev->recovery_cp = mddev->dev_sectors; in raid5_resize()
8422 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); in raid5_resize()
8424 mddev->dev_sectors = sectors; in raid5_resize()
8425 mddev->resync_max_sectors = sectors; in raid5_resize()
8436 * If the chunk size is greater, user-space should request more in check_stripe_cache()
8439 struct r5conf *conf = mddev->private; in check_stripe_cache() local
8440 if (((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 in check_stripe_cache()
8441 > conf->min_nr_stripes || in check_stripe_cache()
8442 ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 in check_stripe_cache()
8443 > conf->min_nr_stripes) { in check_stripe_cache()
8446 ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) in check_stripe_cache()
8447 / RAID5_STRIPE_SIZE(conf))*4); in check_stripe_cache()
8455 struct r5conf *conf = mddev->private; in check_reshape() local
8457 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in check_reshape()
8458 return -EINVAL; in check_reshape()
8459 if (mddev->delta_disks == 0 && in check_reshape()
8460 mddev->new_layout == mddev->layout && in check_reshape()
8461 mddev->new_chunk_sectors == mddev->chunk_sectors) in check_reshape()
8463 if (has_failed(conf)) in check_reshape()
8464 return -EINVAL; in check_reshape()
8465 if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) { in check_reshape()
8472 if (mddev->level == 6) in check_reshape()
8474 if (mddev->raid_disks + mddev->delta_disks < min) in check_reshape()
8475 return -EINVAL; in check_reshape()
8479 return -ENOSPC; in check_reshape()
8481 if (mddev->new_chunk_sectors > mddev->chunk_sectors || in check_reshape()
8482 mddev->delta_disks > 0) in check_reshape()
8483 if (resize_chunks(conf, in check_reshape()
8484 conf->previous_raid_disks in check_reshape()
8485 + max(0, mddev->delta_disks), in check_reshape()
8486 max(mddev->new_chunk_sectors, in check_reshape()
8487 mddev->chunk_sectors) in check_reshape()
8489 return -ENOMEM; in check_reshape()
8491 if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size) in check_reshape()
8493 return resize_stripes(conf, (conf->previous_raid_disks in check_reshape()
8494 + mddev->delta_disks)); in check_reshape()
8499 struct r5conf *conf = mddev->private; in raid5_start_reshape() local
8505 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) in raid5_start_reshape()
8506 return -EBUSY; in raid5_start_reshape()
8509 return -ENOSPC; in raid5_start_reshape()
8511 if (has_failed(conf)) in raid5_start_reshape()
8512 return -EINVAL; in raid5_start_reshape()
8515 if (mddev->recovery_cp < MaxSector) in raid5_start_reshape()
8516 return -EBUSY; in raid5_start_reshape()
8517 for (i = 0; i < conf->raid_disks; i++) in raid5_start_reshape()
8518 if (rdev_mdlock_deref(mddev, conf->disks[i].replacement)) in raid5_start_reshape()
8519 return -EBUSY; in raid5_start_reshape()
8522 if (!test_bit(In_sync, &rdev->flags) in raid5_start_reshape()
8523 && !test_bit(Faulty, &rdev->flags)) in raid5_start_reshape()
8527 if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) in raid5_start_reshape()
8531 return -EINVAL; in raid5_start_reshape()
8537 if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks) in raid5_start_reshape()
8538 < mddev->array_sectors) { in raid5_start_reshape()
8541 return -EINVAL; in raid5_start_reshape()
8544 atomic_set(&conf->reshape_stripes, 0); in raid5_start_reshape()
8545 spin_lock_irq(&conf->device_lock); in raid5_start_reshape()
8546 write_seqcount_begin(&conf->gen_lock); in raid5_start_reshape()
8547 conf->previous_raid_disks = conf->raid_disks; in raid5_start_reshape()
8548 conf->raid_disks += mddev->delta_disks; in raid5_start_reshape()
8549 conf->prev_chunk_sectors = conf->chunk_sectors; in raid5_start_reshape()
8550 conf->chunk_sectors = mddev->new_chunk_sectors; in raid5_start_reshape()
8551 conf->prev_algo = conf->algorithm; in raid5_start_reshape()
8552 conf->algorithm = mddev->new_layout; in raid5_start_reshape()
8553 conf->generation++; in raid5_start_reshape()
8555 * if reshape_progress has been set - so a memory barrier needed. in raid5_start_reshape()
8558 if (mddev->reshape_backwards) in raid5_start_reshape()
8559 conf->reshape_progress = raid5_size(mddev, 0, 0); in raid5_start_reshape()
8561 conf->reshape_progress = 0; in raid5_start_reshape()
8562 conf->reshape_safe = conf->reshape_progress; in raid5_start_reshape()
8563 write_seqcount_end(&conf->gen_lock); in raid5_start_reshape()
8564 spin_unlock_irq(&conf->device_lock); in raid5_start_reshape()
8567 * the reshape wasn't running - like Discard or Read - have in raid5_start_reshape()
8580 if (mddev->delta_disks >= 0) { in raid5_start_reshape()
8582 if (rdev->raid_disk < 0 && in raid5_start_reshape()
8583 !test_bit(Faulty, &rdev->flags)) { in raid5_start_reshape()
8585 if (rdev->raid_disk in raid5_start_reshape()
8586 >= conf->previous_raid_disks) in raid5_start_reshape()
8587 set_bit(In_sync, &rdev->flags); in raid5_start_reshape()
8589 rdev->recovery_offset = 0; in raid5_start_reshape()
8594 } else if (rdev->raid_disk >= conf->previous_raid_disks in raid5_start_reshape()
8595 && !test_bit(Faulty, &rdev->flags)) { in raid5_start_reshape()
8597 set_bit(In_sync, &rdev->flags); in raid5_start_reshape()
8601 * ->degraded is measured against the larger of the in raid5_start_reshape()
8604 spin_lock_irqsave(&conf->device_lock, flags); in raid5_start_reshape()
8605 mddev->degraded = raid5_calc_degraded(conf); in raid5_start_reshape()
8606 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_start_reshape()
8608 mddev->raid_disks = conf->raid_disks; in raid5_start_reshape()
8609 mddev->reshape_position = conf->reshape_progress; in raid5_start_reshape()
8610 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in raid5_start_reshape()
8612 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); in raid5_start_reshape()
8613 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); in raid5_start_reshape()
8614 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); in raid5_start_reshape()
8615 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); in raid5_start_reshape()
8616 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); in raid5_start_reshape()
8617 rcu_assign_pointer(mddev->sync_thread, in raid5_start_reshape()
8619 if (!mddev->sync_thread) { in raid5_start_reshape()
8620 mddev->recovery = 0; in raid5_start_reshape()
8621 spin_lock_irq(&conf->device_lock); in raid5_start_reshape()
8622 write_seqcount_begin(&conf->gen_lock); in raid5_start_reshape()
8623 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; in raid5_start_reshape()
8624 mddev->new_chunk_sectors = in raid5_start_reshape()
8625 conf->chunk_sectors = conf->prev_chunk_sectors; in raid5_start_reshape()
8626 mddev->new_layout = conf->algorithm = conf->prev_algo; in raid5_start_reshape()
8628 rdev->new_data_offset = rdev->data_offset; in raid5_start_reshape()
8630 conf->generation --; in raid5_start_reshape()
8631 conf->reshape_progress = MaxSector; in raid5_start_reshape()
8632 mddev->reshape_position = MaxSector; in raid5_start_reshape()
8633 write_seqcount_end(&conf->gen_lock); in raid5_start_reshape()
8634 spin_unlock_irq(&conf->device_lock); in raid5_start_reshape()
8635 return -EAGAIN; in raid5_start_reshape()
8637 conf->reshape_checkpoint = jiffies; in raid5_start_reshape()
8638 md_wakeup_thread(mddev->sync_thread); in raid5_start_reshape()
8644 * changes needed in 'conf'
8646 static void end_reshape(struct r5conf *conf) in end_reshape() argument
8649 if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { in end_reshape()
8652 spin_lock_irq(&conf->device_lock); in end_reshape()
8653 conf->previous_raid_disks = conf->raid_disks; in end_reshape()
8654 md_finish_reshape(conf->mddev); in end_reshape()
8656 conf->reshape_progress = MaxSector; in end_reshape()
8657 conf->mddev->reshape_position = MaxSector; in end_reshape()
8658 rdev_for_each(rdev, conf->mddev) in end_reshape()
8659 if (rdev->raid_disk >= 0 && in end_reshape()
8660 !test_bit(Journal, &rdev->flags) && in end_reshape()
8661 !test_bit(In_sync, &rdev->flags)) in end_reshape()
8662 rdev->recovery_offset = MaxSector; in end_reshape()
8663 spin_unlock_irq(&conf->device_lock); in end_reshape()
8664 wake_up(&conf->wait_for_overlap); in end_reshape()
8666 if (conf->mddev->queue) in end_reshape()
8667 raid5_set_io_opt(conf); in end_reshape()
8676 struct r5conf *conf = mddev->private; in raid5_finish_reshape() local
8679 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { in raid5_finish_reshape()
8681 if (mddev->delta_disks <= 0) { in raid5_finish_reshape()
8683 spin_lock_irq(&conf->device_lock); in raid5_finish_reshape()
8684 mddev->degraded = raid5_calc_degraded(conf); in raid5_finish_reshape()
8685 spin_unlock_irq(&conf->device_lock); in raid5_finish_reshape()
8686 for (d = conf->raid_disks ; in raid5_finish_reshape()
8687 d < conf->raid_disks - mddev->delta_disks; in raid5_finish_reshape()
8690 conf->disks[d].rdev); in raid5_finish_reshape()
8692 clear_bit(In_sync, &rdev->flags); in raid5_finish_reshape()
8694 conf->disks[d].replacement); in raid5_finish_reshape()
8696 clear_bit(In_sync, &rdev->flags); in raid5_finish_reshape()
8699 mddev->layout = conf->algorithm; in raid5_finish_reshape()
8700 mddev->chunk_sectors = conf->chunk_sectors; in raid5_finish_reshape()
8701 mddev->reshape_position = MaxSector; in raid5_finish_reshape()
8702 mddev->delta_disks = 0; in raid5_finish_reshape()
8703 mddev->reshape_backwards = 0; in raid5_finish_reshape()
8709 struct r5conf *conf = mddev->private; in raid5_quiesce() local
8713 lock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8717 r5c_flush_cache(conf, INT_MAX); in raid5_quiesce()
8721 smp_store_release(&conf->quiesce, 2); in raid5_quiesce()
8722 wait_event_cmd(conf->wait_for_quiescent, in raid5_quiesce()
8723 atomic_read(&conf->active_stripes) == 0 && in raid5_quiesce()
8724 atomic_read(&conf->active_aligned_reads) == 0, in raid5_quiesce()
8725 unlock_all_device_hash_locks_irq(conf), in raid5_quiesce()
8726 lock_all_device_hash_locks_irq(conf)); in raid5_quiesce()
8727 conf->quiesce = 1; in raid5_quiesce()
8728 unlock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8730 wake_up(&conf->wait_for_overlap); in raid5_quiesce()
8732 /* re-enable writes */ in raid5_quiesce()
8733 lock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8734 conf->quiesce = 0; in raid5_quiesce()
8735 wake_up(&conf->wait_for_quiescent); in raid5_quiesce()
8736 wake_up(&conf->wait_for_overlap); in raid5_quiesce()
8737 unlock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8739 log_quiesce(conf, quiesce); in raid5_quiesce()
8744 struct r0conf *raid0_conf = mddev->private; in raid45_takeover_raid0()
8748 if (raid0_conf->nr_strip_zones > 1) { in raid45_takeover_raid0()
8751 return ERR_PTR(-EINVAL); in raid45_takeover_raid0()
8754 sectors = raid0_conf->strip_zone[0].zone_end; in raid45_takeover_raid0()
8755 sector_div(sectors, raid0_conf->strip_zone[0].nb_dev); in raid45_takeover_raid0()
8756 mddev->dev_sectors = sectors; in raid45_takeover_raid0()
8757 mddev->new_level = level; in raid45_takeover_raid0()
8758 mddev->new_layout = ALGORITHM_PARITY_N; in raid45_takeover_raid0()
8759 mddev->new_chunk_sectors = mddev->chunk_sectors; in raid45_takeover_raid0()
8760 mddev->raid_disks += 1; in raid45_takeover_raid0()
8761 mddev->delta_disks = 1; in raid45_takeover_raid0()
8763 mddev->recovery_cp = MaxSector; in raid45_takeover_raid0()
8773 if (mddev->raid_disks != 2 || in raid5_takeover_raid1()
8774 mddev->degraded > 1) in raid5_takeover_raid1()
8775 return ERR_PTR(-EINVAL); in raid5_takeover_raid1()
8777 /* Should check if there are write-behind devices? */ in raid5_takeover_raid1()
8782 while (chunksect && (mddev->array_sectors & (chunksect-1))) in raid5_takeover_raid1()
8785 if ((chunksect<<9) < RAID5_STRIPE_SIZE((struct r5conf *)mddev->private)) in raid5_takeover_raid1()
8787 return ERR_PTR(-EINVAL); in raid5_takeover_raid1()
8789 mddev->new_level = 5; in raid5_takeover_raid1()
8790 mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; in raid5_takeover_raid1()
8791 mddev->new_chunk_sectors = chunksect; in raid5_takeover_raid1()
8804 switch (mddev->layout) { in raid5_takeover_raid6()
8824 return ERR_PTR(-EINVAL); in raid5_takeover_raid6()
8826 mddev->new_level = 5; in raid5_takeover_raid6()
8827 mddev->new_layout = new_layout; in raid5_takeover_raid6()
8828 mddev->delta_disks = -1; in raid5_takeover_raid6()
8829 mddev->raid_disks -= 1; in raid5_takeover_raid6()
8835 /* For a 2-drive array, the layout and chunk size can be changed in raid5_check_reshape()
8837 * For larger arrays we record the new value - after validation in raid5_check_reshape()
8840 struct r5conf *conf = mddev->private; in raid5_check_reshape() local
8841 int new_chunk = mddev->new_chunk_sectors; in raid5_check_reshape()
8843 if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout)) in raid5_check_reshape()
8844 return -EINVAL; in raid5_check_reshape()
8847 return -EINVAL; in raid5_check_reshape()
8849 return -EINVAL; in raid5_check_reshape()
8850 if (mddev->array_sectors & (new_chunk-1)) in raid5_check_reshape()
8852 return -EINVAL; in raid5_check_reshape()
8857 if (mddev->raid_disks == 2) { in raid5_check_reshape()
8859 if (mddev->new_layout >= 0) { in raid5_check_reshape()
8860 conf->algorithm = mddev->new_layout; in raid5_check_reshape()
8861 mddev->layout = mddev->new_layout; in raid5_check_reshape()
8864 conf->chunk_sectors = new_chunk ; in raid5_check_reshape()
8865 mddev->chunk_sectors = new_chunk; in raid5_check_reshape()
8867 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in raid5_check_reshape()
8868 md_wakeup_thread(mddev->thread); in raid5_check_reshape()
8875 int new_chunk = mddev->new_chunk_sectors; in raid6_check_reshape()
8877 if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) in raid6_check_reshape()
8878 return -EINVAL; in raid6_check_reshape()
8881 return -EINVAL; in raid6_check_reshape()
8883 return -EINVAL; in raid6_check_reshape()
8884 if (mddev->array_sectors & (new_chunk-1)) in raid6_check_reshape()
8886 return -EINVAL; in raid6_check_reshape()
8896 * raid0 - if there is only one strip zone - make it a raid4 layout in raid5_takeover()
8897 * raid1 - if there are two drives. We need to know the chunk size in raid5_takeover()
8898 * raid4 - trivial - just use a raid4 layout. in raid5_takeover()
8899 * raid6 - Providing it is a *_6 layout in raid5_takeover()
8901 if (mddev->level == 0) in raid5_takeover()
8903 if (mddev->level == 1) in raid5_takeover()
8905 if (mddev->level == 4) { in raid5_takeover()
8906 mddev->new_layout = ALGORITHM_PARITY_N; in raid5_takeover()
8907 mddev->new_level = 5; in raid5_takeover()
8910 if (mddev->level == 6) in raid5_takeover()
8913 return ERR_PTR(-EINVAL); in raid5_takeover()
8919 * raid0 - if there is only one strip zone in raid4_takeover()
8920 * raid5 - if layout is right in raid4_takeover()
8922 if (mddev->level == 0) in raid4_takeover()
8924 if (mddev->level == 5 && in raid4_takeover()
8925 mddev->layout == ALGORITHM_PARITY_N) { in raid4_takeover()
8926 mddev->new_layout = 0; in raid4_takeover()
8927 mddev->new_level = 4; in raid4_takeover()
8930 return ERR_PTR(-EINVAL); in raid4_takeover()
8943 if (mddev->pers != &raid5_personality) in raid6_takeover()
8944 return ERR_PTR(-EINVAL); in raid6_takeover()
8945 if (mddev->degraded > 1) in raid6_takeover()
8946 return ERR_PTR(-EINVAL); in raid6_takeover()
8947 if (mddev->raid_disks > 253) in raid6_takeover()
8948 return ERR_PTR(-EINVAL); in raid6_takeover()
8949 if (mddev->raid_disks < 3) in raid6_takeover()
8950 return ERR_PTR(-EINVAL); in raid6_takeover()
8952 switch (mddev->layout) { in raid6_takeover()
8972 return ERR_PTR(-EINVAL); in raid6_takeover()
8974 mddev->new_level = 6; in raid6_takeover()
8975 mddev->new_layout = new_layout; in raid6_takeover()
8976 mddev->delta_disks = 1; in raid6_takeover()
8977 mddev->raid_disks += 1; in raid6_takeover()
8983 struct r5conf *conf; in raid5_change_consistency_policy() local
8989 conf = mddev->private; in raid5_change_consistency_policy()
8990 if (!conf) { in raid5_change_consistency_policy()
8992 return -ENODEV; in raid5_change_consistency_policy()
8997 if (!raid5_has_ppl(conf) && conf->level == 5) { in raid5_change_consistency_policy()
8998 err = log_init(conf, NULL, true); in raid5_change_consistency_policy()
9000 err = resize_stripes(conf, conf->pool_size); in raid5_change_consistency_policy()
9003 log_exit(conf); in raid5_change_consistency_policy()
9008 err = -EINVAL; in raid5_change_consistency_policy()
9010 if (raid5_has_ppl(conf)) { in raid5_change_consistency_policy()
9012 log_exit(conf); in raid5_change_consistency_policy()
9014 err = resize_stripes(conf, conf->pool_size); in raid5_change_consistency_policy()
9015 } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && in raid5_change_consistency_policy()
9016 r5l_log_disk_error(conf)) { in raid5_change_consistency_policy()
9021 if (test_bit(Journal, &rdev->flags)) { in raid5_change_consistency_policy()
9028 clear_bit(MD_HAS_JOURNAL, &mddev->flags); in raid5_change_consistency_policy()
9031 err = -EBUSY; in raid5_change_consistency_policy()
9033 err = -EINVAL; in raid5_change_consistency_policy()
9035 err = -EINVAL; in raid5_change_consistency_policy()
9048 struct r5conf *conf = mddev->private; in raid5_start() local
9050 return r5l_start(conf->log); in raid5_start()
9055 struct r5conf *conf = mddev->private; in raid5_prepare_suspend() local
9057 wait_event(mddev->sb_wait, !reshape_inprogress(mddev) || in raid5_prepare_suspend()
9058 percpu_ref_is_zero(&mddev->active_io)); in raid5_prepare_suspend()
9059 if (percpu_ref_is_zero(&mddev->active_io)) in raid5_prepare_suspend()
9066 wake_up(&conf->wait_for_overlap); in raid5_prepare_suspend()
9153 return -ENOMEM; in raid5_init()
9182 MODULE_ALIAS("md-personality-4"); /* RAID5 */
9183 MODULE_ALIAS("md-raid5");
9184 MODULE_ALIAS("md-raid4");
9185 MODULE_ALIAS("md-level-5");
9186 MODULE_ALIAS("md-level-4");
9187 MODULE_ALIAS("md-personality-8"); /* RAID6 */
9188 MODULE_ALIAS("md-raid6");
9189 MODULE_ALIAS("md-level-6");