1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_worker.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner 24b411b363SPhilipp Reisner */ 25b411b363SPhilipp Reisner 26b411b363SPhilipp Reisner #include <linux/module.h> 27b411b363SPhilipp Reisner #include <linux/drbd.h> 28b411b363SPhilipp Reisner #include <linux/sched.h> 29b411b363SPhilipp Reisner #include <linux/smp_lock.h> 30b411b363SPhilipp Reisner #include <linux/wait.h> 31b411b363SPhilipp Reisner #include <linux/mm.h> 32b411b363SPhilipp Reisner #include <linux/memcontrol.h> 33b411b363SPhilipp Reisner #include <linux/mm_inline.h> 34b411b363SPhilipp Reisner #include <linux/slab.h> 35b411b363SPhilipp Reisner #include <linux/random.h> 36b411b363SPhilipp Reisner #include <linux/string.h> 37b411b363SPhilipp Reisner #include <linux/scatterlist.h> 38b411b363SPhilipp Reisner 39b411b363SPhilipp Reisner #include "drbd_int.h" 40b411b363SPhilipp Reisner #include "drbd_req.h" 41b411b363SPhilipp Reisner 42b411b363SPhilipp Reisner static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); 43b411b363SPhilipp Reisner 44b411b363SPhilipp Reisner 45b411b363SPhilipp Reisner 46b411b363SPhilipp Reisner /* defined here: 47b411b363SPhilipp Reisner drbd_md_io_complete 4845bb912bSLars Ellenberg drbd_endio_sec 49b411b363SPhilipp Reisner drbd_endio_pri 50b411b363SPhilipp Reisner 51b411b363SPhilipp Reisner * more endio handlers: 52b411b363SPhilipp Reisner atodb_endio in drbd_actlog.c 53b411b363SPhilipp Reisner drbd_bm_async_io_complete in drbd_bitmap.c 54b411b363SPhilipp Reisner 55b411b363SPhilipp Reisner * For all these callbacks, note the following: 56b411b363SPhilipp Reisner * The callbacks will be called in irq context by the IDE drivers, 57b411b363SPhilipp Reisner * and in Softirqs/Tasklets/BH context by the SCSI drivers. 58b411b363SPhilipp Reisner * Try to get the locking right :) 59b411b363SPhilipp Reisner * 60b411b363SPhilipp Reisner */ 61b411b363SPhilipp Reisner 62b411b363SPhilipp Reisner 63b411b363SPhilipp Reisner /* About the global_state_lock 64b411b363SPhilipp Reisner Each state transition on an device holds a read lock. In case we have 65b411b363SPhilipp Reisner to evaluate the sync after dependencies, we grab a write lock, because 66b411b363SPhilipp Reisner we need stable states on all devices for that. */ 67b411b363SPhilipp Reisner rwlock_t global_state_lock; 68b411b363SPhilipp Reisner 69b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO 70b411b363SPhilipp Reisner * submitted by drbd_md_sync_page_io() 71b411b363SPhilipp Reisner */ 72b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error) 73b411b363SPhilipp Reisner { 74b411b363SPhilipp Reisner struct drbd_md_io *md_io; 75b411b363SPhilipp Reisner 76b411b363SPhilipp Reisner md_io = (struct drbd_md_io *)bio->bi_private; 77b411b363SPhilipp Reisner md_io->error = error; 78b411b363SPhilipp Reisner 79b411b363SPhilipp Reisner complete(&md_io->event); 80b411b363SPhilipp Reisner } 81b411b363SPhilipp Reisner 82b411b363SPhilipp Reisner /* reads on behalf of the partner, 83b411b363SPhilipp Reisner * "submitted" by the receiver 84b411b363SPhilipp Reisner */ 8545bb912bSLars Ellenberg void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) 86b411b363SPhilipp Reisner { 87b411b363SPhilipp Reisner unsigned long flags = 0; 8845bb912bSLars Ellenberg struct drbd_conf *mdev = e->mdev; 89b411b363SPhilipp Reisner 90b411b363SPhilipp Reisner D_ASSERT(e->block_id != ID_VACANT); 91b411b363SPhilipp Reisner 92b411b363SPhilipp Reisner spin_lock_irqsave(&mdev->req_lock, flags); 93b411b363SPhilipp Reisner mdev->read_cnt += e->size >> 9; 94b411b363SPhilipp Reisner list_del(&e->w.list); 95b411b363SPhilipp Reisner if (list_empty(&mdev->read_ee)) 96b411b363SPhilipp Reisner wake_up(&mdev->ee_wait); 9745bb912bSLars Ellenberg if (test_bit(__EE_WAS_ERROR, &e->flags)) 9845bb912bSLars Ellenberg __drbd_chk_io_error(mdev, FALSE); 99b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->req_lock, flags); 100b411b363SPhilipp Reisner 101b411b363SPhilipp Reisner drbd_queue_work(&mdev->data.work, &e->w); 102b411b363SPhilipp Reisner put_ldev(mdev); 103b411b363SPhilipp Reisner } 104b411b363SPhilipp Reisner 10545bb912bSLars Ellenberg static int is_failed_barrier(int ee_flags) 10645bb912bSLars Ellenberg { 10745bb912bSLars Ellenberg return (ee_flags & (EE_IS_BARRIER|EE_WAS_ERROR|EE_RESUBMITTED)) 10845bb912bSLars Ellenberg == (EE_IS_BARRIER|EE_WAS_ERROR); 10945bb912bSLars Ellenberg } 11045bb912bSLars Ellenberg 111b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes, 11245bb912bSLars Ellenberg * "submitted" by the receiver, final stage. */ 11345bb912bSLars Ellenberg static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local) 114b411b363SPhilipp Reisner { 115b411b363SPhilipp Reisner unsigned long flags = 0; 11645bb912bSLars Ellenberg struct drbd_conf *mdev = e->mdev; 117b411b363SPhilipp Reisner sector_t e_sector; 118b411b363SPhilipp Reisner int do_wake; 119b411b363SPhilipp Reisner int is_syncer_req; 120b411b363SPhilipp Reisner int do_al_complete_io; 121b411b363SPhilipp Reisner 12245bb912bSLars Ellenberg /* if this is a failed barrier request, disable use of barriers, 12345bb912bSLars Ellenberg * and schedule for resubmission */ 12445bb912bSLars Ellenberg if (is_failed_barrier(e->flags)) { 125b411b363SPhilipp Reisner drbd_bump_write_ordering(mdev, WO_bdev_flush); 126b411b363SPhilipp Reisner spin_lock_irqsave(&mdev->req_lock, flags); 127b411b363SPhilipp Reisner list_del(&e->w.list); 128fc8ce194SPhilipp Reisner e->flags = (e->flags & ~EE_WAS_ERROR) | EE_RESUBMITTED; 129b411b363SPhilipp Reisner e->w.cb = w_e_reissue; 130b411b363SPhilipp Reisner /* put_ldev actually happens below, once we come here again. */ 131b411b363SPhilipp Reisner __release(local); 132b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->req_lock, flags); 133b411b363SPhilipp Reisner drbd_queue_work(&mdev->data.work, &e->w); 134b411b363SPhilipp Reisner return; 135b411b363SPhilipp Reisner } 136b411b363SPhilipp Reisner 137b411b363SPhilipp Reisner D_ASSERT(e->block_id != ID_VACANT); 138b411b363SPhilipp Reisner 139b411b363SPhilipp Reisner /* after we moved e to done_ee, 140b411b363SPhilipp Reisner * we may no longer access it, 141b411b363SPhilipp Reisner * it may be freed/reused already! 142b411b363SPhilipp Reisner * (as soon as we release the req_lock) */ 143b411b363SPhilipp Reisner e_sector = e->sector; 144b411b363SPhilipp Reisner do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; 14545bb912bSLars Ellenberg is_syncer_req = is_syncer_block_id(e->block_id); 146b411b363SPhilipp Reisner 14745bb912bSLars Ellenberg spin_lock_irqsave(&mdev->req_lock, flags); 14845bb912bSLars Ellenberg mdev->writ_cnt += e->size >> 9; 149b411b363SPhilipp Reisner list_del(&e->w.list); /* has been on active_ee or sync_ee */ 150b411b363SPhilipp Reisner list_add_tail(&e->w.list, &mdev->done_ee); 151b411b363SPhilipp Reisner 152b411b363SPhilipp Reisner /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, 153b411b363SPhilipp Reisner * neither did we wake possibly waiting conflicting requests. 154b411b363SPhilipp Reisner * done from "drbd_process_done_ee" within the appropriate w.cb 155b411b363SPhilipp Reisner * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ 156b411b363SPhilipp Reisner 157b411b363SPhilipp Reisner do_wake = is_syncer_req 158b411b363SPhilipp Reisner ? list_empty(&mdev->sync_ee) 159b411b363SPhilipp Reisner : list_empty(&mdev->active_ee); 160b411b363SPhilipp Reisner 16145bb912bSLars Ellenberg if (test_bit(__EE_WAS_ERROR, &e->flags)) 162b411b363SPhilipp Reisner __drbd_chk_io_error(mdev, FALSE); 163b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->req_lock, flags); 164b411b363SPhilipp Reisner 165b411b363SPhilipp Reisner if (is_syncer_req) 166b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, e_sector); 167b411b363SPhilipp Reisner 168b411b363SPhilipp Reisner if (do_wake) 169b411b363SPhilipp Reisner wake_up(&mdev->ee_wait); 170b411b363SPhilipp Reisner 171b411b363SPhilipp Reisner if (do_al_complete_io) 172b411b363SPhilipp Reisner drbd_al_complete_io(mdev, e_sector); 173b411b363SPhilipp Reisner 174b411b363SPhilipp Reisner wake_asender(mdev); 175b411b363SPhilipp Reisner put_ldev(mdev); 17645bb912bSLars Ellenberg } 177b411b363SPhilipp Reisner 17845bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes, 17945bb912bSLars Ellenberg * "submitted" by the receiver. 18045bb912bSLars Ellenberg */ 18145bb912bSLars Ellenberg void drbd_endio_sec(struct bio *bio, int error) 18245bb912bSLars Ellenberg { 18345bb912bSLars Ellenberg struct drbd_epoch_entry *e = bio->bi_private; 18445bb912bSLars Ellenberg struct drbd_conf *mdev = e->mdev; 18545bb912bSLars Ellenberg int uptodate = bio_flagged(bio, BIO_UPTODATE); 18645bb912bSLars Ellenberg int is_write = bio_data_dir(bio) == WRITE; 18745bb912bSLars Ellenberg 18845bb912bSLars Ellenberg if (error) 18945bb912bSLars Ellenberg dev_warn(DEV, "%s: error=%d s=%llus\n", 19045bb912bSLars Ellenberg is_write ? "write" : "read", error, 19145bb912bSLars Ellenberg (unsigned long long)e->sector); 19245bb912bSLars Ellenberg if (!error && !uptodate) { 19345bb912bSLars Ellenberg dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", 19445bb912bSLars Ellenberg is_write ? "write" : "read", 19545bb912bSLars Ellenberg (unsigned long long)e->sector); 19645bb912bSLars Ellenberg /* strange behavior of some lower level drivers... 19745bb912bSLars Ellenberg * fail the request by clearing the uptodate flag, 19845bb912bSLars Ellenberg * but do not return any error?! */ 19945bb912bSLars Ellenberg error = -EIO; 20045bb912bSLars Ellenberg } 20145bb912bSLars Ellenberg 20245bb912bSLars Ellenberg if (error) 20345bb912bSLars Ellenberg set_bit(__EE_WAS_ERROR, &e->flags); 20445bb912bSLars Ellenberg 20545bb912bSLars Ellenberg bio_put(bio); /* no need for the bio anymore */ 20645bb912bSLars Ellenberg if (atomic_dec_and_test(&e->pending_bios)) { 20745bb912bSLars Ellenberg if (is_write) 20845bb912bSLars Ellenberg drbd_endio_write_sec_final(e); 20945bb912bSLars Ellenberg else 21045bb912bSLars Ellenberg drbd_endio_read_sec_final(e); 21145bb912bSLars Ellenberg } 212b411b363SPhilipp Reisner } 213b411b363SPhilipp Reisner 214b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request 215b411b363SPhilipp Reisner */ 216b411b363SPhilipp Reisner void drbd_endio_pri(struct bio *bio, int error) 217b411b363SPhilipp Reisner { 218b411b363SPhilipp Reisner struct drbd_request *req = bio->bi_private; 219b411b363SPhilipp Reisner struct drbd_conf *mdev = req->mdev; 220b411b363SPhilipp Reisner enum drbd_req_event what; 221b411b363SPhilipp Reisner int uptodate = bio_flagged(bio, BIO_UPTODATE); 222b411b363SPhilipp Reisner 223b411b363SPhilipp Reisner if (!error && !uptodate) { 224b411b363SPhilipp Reisner dev_warn(DEV, "p %s: setting error to -EIO\n", 225b411b363SPhilipp Reisner bio_data_dir(bio) == WRITE ? "write" : "read"); 226b411b363SPhilipp Reisner /* strange behavior of some lower level drivers... 227b411b363SPhilipp Reisner * fail the request by clearing the uptodate flag, 228b411b363SPhilipp Reisner * but do not return any error?! */ 229b411b363SPhilipp Reisner error = -EIO; 230b411b363SPhilipp Reisner } 231b411b363SPhilipp Reisner 232b411b363SPhilipp Reisner /* to avoid recursion in __req_mod */ 233b411b363SPhilipp Reisner if (unlikely(error)) { 234b411b363SPhilipp Reisner what = (bio_data_dir(bio) == WRITE) 235b411b363SPhilipp Reisner ? write_completed_with_error 2365c3c7e64SLars Ellenberg : (bio_rw(bio) == READ) 237b411b363SPhilipp Reisner ? read_completed_with_error 238b411b363SPhilipp Reisner : read_ahead_completed_with_error; 239b411b363SPhilipp Reisner } else 240b411b363SPhilipp Reisner what = completed_ok; 241b411b363SPhilipp Reisner 242b411b363SPhilipp Reisner bio_put(req->private_bio); 243b411b363SPhilipp Reisner req->private_bio = ERR_PTR(error); 244b411b363SPhilipp Reisner 2450f0601f4SLars Ellenberg req_mod(req, what); 246b411b363SPhilipp Reisner } 247b411b363SPhilipp Reisner 248b411b363SPhilipp Reisner int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 249b411b363SPhilipp Reisner { 250b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 251b411b363SPhilipp Reisner 252b411b363SPhilipp Reisner /* We should not detach for read io-error, 253b411b363SPhilipp Reisner * but try to WRITE the P_DATA_REPLY to the failed location, 254b411b363SPhilipp Reisner * to give the disk the chance to relocate that block */ 255b411b363SPhilipp Reisner 256b411b363SPhilipp Reisner spin_lock_irq(&mdev->req_lock); 257d255e5ffSLars Ellenberg if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { 258d255e5ffSLars Ellenberg _req_mod(req, read_retry_remote_canceled); 259b411b363SPhilipp Reisner spin_unlock_irq(&mdev->req_lock); 260b411b363SPhilipp Reisner return 1; 261b411b363SPhilipp Reisner } 262b411b363SPhilipp Reisner spin_unlock_irq(&mdev->req_lock); 263b411b363SPhilipp Reisner 264b411b363SPhilipp Reisner return w_send_read_req(mdev, w, 0); 265b411b363SPhilipp Reisner } 266b411b363SPhilipp Reisner 267b411b363SPhilipp Reisner int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 268b411b363SPhilipp Reisner { 269b411b363SPhilipp Reisner ERR_IF(cancel) return 1; 270b411b363SPhilipp Reisner dev_err(DEV, "resync inactive, but callback triggered??\n"); 271b411b363SPhilipp Reisner return 1; /* Simply ignore this! */ 272b411b363SPhilipp Reisner } 273b411b363SPhilipp Reisner 27445bb912bSLars Ellenberg void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) 27545bb912bSLars Ellenberg { 27645bb912bSLars Ellenberg struct hash_desc desc; 27745bb912bSLars Ellenberg struct scatterlist sg; 27845bb912bSLars Ellenberg struct page *page = e->pages; 27945bb912bSLars Ellenberg struct page *tmp; 28045bb912bSLars Ellenberg unsigned len; 28145bb912bSLars Ellenberg 28245bb912bSLars Ellenberg desc.tfm = tfm; 28345bb912bSLars Ellenberg desc.flags = 0; 28445bb912bSLars Ellenberg 28545bb912bSLars Ellenberg sg_init_table(&sg, 1); 28645bb912bSLars Ellenberg crypto_hash_init(&desc); 28745bb912bSLars Ellenberg 28845bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) { 28945bb912bSLars Ellenberg /* all but the last page will be fully used */ 29045bb912bSLars Ellenberg sg_set_page(&sg, page, PAGE_SIZE, 0); 29145bb912bSLars Ellenberg crypto_hash_update(&desc, &sg, sg.length); 29245bb912bSLars Ellenberg page = tmp; 29345bb912bSLars Ellenberg } 29445bb912bSLars Ellenberg /* and now the last, possibly only partially used page */ 29545bb912bSLars Ellenberg len = e->size & (PAGE_SIZE - 1); 29645bb912bSLars Ellenberg sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); 29745bb912bSLars Ellenberg crypto_hash_update(&desc, &sg, sg.length); 29845bb912bSLars Ellenberg crypto_hash_final(&desc, digest); 29945bb912bSLars Ellenberg } 30045bb912bSLars Ellenberg 30145bb912bSLars Ellenberg void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) 302b411b363SPhilipp Reisner { 303b411b363SPhilipp Reisner struct hash_desc desc; 304b411b363SPhilipp Reisner struct scatterlist sg; 305b411b363SPhilipp Reisner struct bio_vec *bvec; 306b411b363SPhilipp Reisner int i; 307b411b363SPhilipp Reisner 308b411b363SPhilipp Reisner desc.tfm = tfm; 309b411b363SPhilipp Reisner desc.flags = 0; 310b411b363SPhilipp Reisner 311b411b363SPhilipp Reisner sg_init_table(&sg, 1); 312b411b363SPhilipp Reisner crypto_hash_init(&desc); 313b411b363SPhilipp Reisner 314b411b363SPhilipp Reisner __bio_for_each_segment(bvec, bio, i, 0) { 315b411b363SPhilipp Reisner sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); 316b411b363SPhilipp Reisner crypto_hash_update(&desc, &sg, sg.length); 317b411b363SPhilipp Reisner } 318b411b363SPhilipp Reisner crypto_hash_final(&desc, digest); 319b411b363SPhilipp Reisner } 320b411b363SPhilipp Reisner 321b411b363SPhilipp Reisner static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 322b411b363SPhilipp Reisner { 323b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 324b411b363SPhilipp Reisner int digest_size; 325b411b363SPhilipp Reisner void *digest; 326b411b363SPhilipp Reisner int ok; 327b411b363SPhilipp Reisner 328b411b363SPhilipp Reisner D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); 329b411b363SPhilipp Reisner 330b411b363SPhilipp Reisner if (unlikely(cancel)) { 331b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 332b411b363SPhilipp Reisner return 1; 333b411b363SPhilipp Reisner } 334b411b363SPhilipp Reisner 33545bb912bSLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) == 0)) { 336b411b363SPhilipp Reisner digest_size = crypto_hash_digestsize(mdev->csums_tfm); 337b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 338b411b363SPhilipp Reisner if (digest) { 33945bb912bSLars Ellenberg drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); 340b411b363SPhilipp Reisner 341b411b363SPhilipp Reisner inc_rs_pending(mdev); 342b411b363SPhilipp Reisner ok = drbd_send_drequest_csum(mdev, 343b411b363SPhilipp Reisner e->sector, 344b411b363SPhilipp Reisner e->size, 345b411b363SPhilipp Reisner digest, 346b411b363SPhilipp Reisner digest_size, 347b411b363SPhilipp Reisner P_CSUM_RS_REQUEST); 348b411b363SPhilipp Reisner kfree(digest); 349b411b363SPhilipp Reisner } else { 350b411b363SPhilipp Reisner dev_err(DEV, "kmalloc() of digest failed.\n"); 351b411b363SPhilipp Reisner ok = 0; 352b411b363SPhilipp Reisner } 353b411b363SPhilipp Reisner } else 354b411b363SPhilipp Reisner ok = 1; 355b411b363SPhilipp Reisner 356b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 357b411b363SPhilipp Reisner 358b411b363SPhilipp Reisner if (unlikely(!ok)) 359b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); 360b411b363SPhilipp Reisner return ok; 361b411b363SPhilipp Reisner } 362b411b363SPhilipp Reisner 363b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 364b411b363SPhilipp Reisner 365b411b363SPhilipp Reisner static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) 366b411b363SPhilipp Reisner { 367b411b363SPhilipp Reisner struct drbd_epoch_entry *e; 368b411b363SPhilipp Reisner 369b411b363SPhilipp Reisner if (!get_ldev(mdev)) 37080a40e43SLars Ellenberg return -EIO; 371b411b363SPhilipp Reisner 3720f0601f4SLars Ellenberg if (drbd_rs_should_slow_down(mdev)) 3730f0601f4SLars Ellenberg goto defer; 3740f0601f4SLars Ellenberg 375b411b363SPhilipp Reisner /* GFP_TRY, because if there is no memory available right now, this may 376b411b363SPhilipp Reisner * be rescheduled for later. It is "only" background resync, after all. */ 377b411b363SPhilipp Reisner e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); 37845bb912bSLars Ellenberg if (!e) 37980a40e43SLars Ellenberg goto defer; 380b411b363SPhilipp Reisner 38180a40e43SLars Ellenberg e->w.cb = w_e_send_csum; 382b411b363SPhilipp Reisner spin_lock_irq(&mdev->req_lock); 383b411b363SPhilipp Reisner list_add(&e->w.list, &mdev->read_ee); 384b411b363SPhilipp Reisner spin_unlock_irq(&mdev->req_lock); 385b411b363SPhilipp Reisner 3860f0601f4SLars Ellenberg atomic_add(size >> 9, &mdev->rs_sect_ev); 38745bb912bSLars Ellenberg if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) 38880a40e43SLars Ellenberg return 0; 38945bb912bSLars Ellenberg 39045bb912bSLars Ellenberg drbd_free_ee(mdev, e); 39180a40e43SLars Ellenberg defer: 39245bb912bSLars Ellenberg put_ldev(mdev); 39380a40e43SLars Ellenberg return -EAGAIN; 394b411b363SPhilipp Reisner } 395b411b363SPhilipp Reisner 396b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data) 397b411b363SPhilipp Reisner { 398b411b363SPhilipp Reisner struct drbd_conf *mdev = (struct drbd_conf *) data; 399b411b363SPhilipp Reisner int queue; 400b411b363SPhilipp Reisner 401b411b363SPhilipp Reisner queue = 1; 40263106d3cSPhilipp Reisner switch (mdev->state.conn) { 40363106d3cSPhilipp Reisner case C_VERIFY_S: 404b411b363SPhilipp Reisner mdev->resync_work.cb = w_make_ov_request; 40563106d3cSPhilipp Reisner break; 40663106d3cSPhilipp Reisner case C_SYNC_TARGET: 407b411b363SPhilipp Reisner mdev->resync_work.cb = w_make_resync_request; 40863106d3cSPhilipp Reisner break; 40963106d3cSPhilipp Reisner default: 410b411b363SPhilipp Reisner queue = 0; 411b411b363SPhilipp Reisner mdev->resync_work.cb = w_resync_inactive; 412b411b363SPhilipp Reisner } 413b411b363SPhilipp Reisner 414b411b363SPhilipp Reisner /* harmless race: list_empty outside data.work.q_lock */ 415b411b363SPhilipp Reisner if (list_empty(&mdev->resync_work.list) && queue) 416b411b363SPhilipp Reisner drbd_queue_work(&mdev->data.work, &mdev->resync_work); 417b411b363SPhilipp Reisner } 418b411b363SPhilipp Reisner 419778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value) 420778f271dSPhilipp Reisner { 421778f271dSPhilipp Reisner int i; 422778f271dSPhilipp Reisner 423778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 424f10f2623SPhilipp Reisner fb->values[i] = value; 425778f271dSPhilipp Reisner } 426778f271dSPhilipp Reisner 427778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value) 428778f271dSPhilipp Reisner { 429778f271dSPhilipp Reisner int ov; 430778f271dSPhilipp Reisner 431778f271dSPhilipp Reisner ov = fb->values[fb->head_index]; 432778f271dSPhilipp Reisner fb->values[fb->head_index++] = value; 433778f271dSPhilipp Reisner 434778f271dSPhilipp Reisner if (fb->head_index >= fb->size) 435778f271dSPhilipp Reisner fb->head_index = 0; 436778f271dSPhilipp Reisner 437778f271dSPhilipp Reisner return ov; 438778f271dSPhilipp Reisner } 439778f271dSPhilipp Reisner 440778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value) 441778f271dSPhilipp Reisner { 442778f271dSPhilipp Reisner int i; 443778f271dSPhilipp Reisner 444778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 445778f271dSPhilipp Reisner fb->values[i] += value; 446778f271dSPhilipp Reisner } 447778f271dSPhilipp Reisner 448778f271dSPhilipp Reisner int drbd_rs_controller(struct drbd_conf *mdev) 449778f271dSPhilipp Reisner { 450778f271dSPhilipp Reisner unsigned int sect_in; /* Number of sectors that came in since the last turn */ 451778f271dSPhilipp Reisner unsigned int want; /* The number of sectors we want in the proxy */ 452778f271dSPhilipp Reisner int req_sect; /* Number of sectors to request in this turn */ 453778f271dSPhilipp Reisner int correction; /* Number of sectors more we need in the proxy*/ 454778f271dSPhilipp Reisner int cps; /* correction per invocation of drbd_rs_controller() */ 455778f271dSPhilipp Reisner int steps; /* Number of time steps to plan ahead */ 456778f271dSPhilipp Reisner int curr_corr; 457778f271dSPhilipp Reisner int max_sect; 458778f271dSPhilipp Reisner 459778f271dSPhilipp Reisner sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */ 460778f271dSPhilipp Reisner mdev->rs_in_flight -= sect_in; 461778f271dSPhilipp Reisner 462778f271dSPhilipp Reisner spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ 463778f271dSPhilipp Reisner 464778f271dSPhilipp Reisner steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ 465778f271dSPhilipp Reisner 466778f271dSPhilipp Reisner if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ 467778f271dSPhilipp Reisner want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps; 468778f271dSPhilipp Reisner } else { /* normal path */ 469778f271dSPhilipp Reisner want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target : 470778f271dSPhilipp Reisner sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10); 471778f271dSPhilipp Reisner } 472778f271dSPhilipp Reisner 473778f271dSPhilipp Reisner correction = want - mdev->rs_in_flight - mdev->rs_planed; 474778f271dSPhilipp Reisner 475778f271dSPhilipp Reisner /* Plan ahead */ 476778f271dSPhilipp Reisner cps = correction / steps; 477778f271dSPhilipp Reisner fifo_add_val(&mdev->rs_plan_s, cps); 478778f271dSPhilipp Reisner mdev->rs_planed += cps * steps; 479778f271dSPhilipp Reisner 480778f271dSPhilipp Reisner /* What we do in this step */ 481778f271dSPhilipp Reisner curr_corr = fifo_push(&mdev->rs_plan_s, 0); 482778f271dSPhilipp Reisner spin_unlock(&mdev->peer_seq_lock); 483778f271dSPhilipp Reisner mdev->rs_planed -= curr_corr; 484778f271dSPhilipp Reisner 485778f271dSPhilipp Reisner req_sect = sect_in + curr_corr; 486778f271dSPhilipp Reisner if (req_sect < 0) 487778f271dSPhilipp Reisner req_sect = 0; 488778f271dSPhilipp Reisner 489778f271dSPhilipp Reisner max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ; 490778f271dSPhilipp Reisner if (req_sect > max_sect) 491778f271dSPhilipp Reisner req_sect = max_sect; 492778f271dSPhilipp Reisner 493778f271dSPhilipp Reisner /* 494778f271dSPhilipp Reisner dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", 495778f271dSPhilipp Reisner sect_in, mdev->rs_in_flight, want, correction, 496778f271dSPhilipp Reisner steps, cps, mdev->rs_planed, curr_corr, req_sect); 497778f271dSPhilipp Reisner */ 498778f271dSPhilipp Reisner 499778f271dSPhilipp Reisner return req_sect; 500778f271dSPhilipp Reisner } 501778f271dSPhilipp Reisner 502b411b363SPhilipp Reisner int w_make_resync_request(struct drbd_conf *mdev, 503b411b363SPhilipp Reisner struct drbd_work *w, int cancel) 504b411b363SPhilipp Reisner { 505b411b363SPhilipp Reisner unsigned long bit; 506b411b363SPhilipp Reisner sector_t sector; 507b411b363SPhilipp Reisner const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 508bb3d000cSLars Ellenberg int max_segment_size; 5090f0601f4SLars Ellenberg int number, rollback_i, size, pe, mx; 510b411b363SPhilipp Reisner int align, queued, sndbuf; 5110f0601f4SLars Ellenberg int i = 0; 512b411b363SPhilipp Reisner 513b411b363SPhilipp Reisner if (unlikely(cancel)) 514b411b363SPhilipp Reisner return 1; 515b411b363SPhilipp Reisner 516b411b363SPhilipp Reisner if (unlikely(mdev->state.conn < C_CONNECTED)) { 517b411b363SPhilipp Reisner dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected"); 518b411b363SPhilipp Reisner return 0; 519b411b363SPhilipp Reisner } 520b411b363SPhilipp Reisner 521b411b363SPhilipp Reisner if (mdev->state.conn != C_SYNC_TARGET) 522b411b363SPhilipp Reisner dev_err(DEV, "%s in w_make_resync_request\n", 523b411b363SPhilipp Reisner drbd_conn_str(mdev->state.conn)); 524b411b363SPhilipp Reisner 525af85e8e8SLars Ellenberg if (mdev->rs_total == 0) { 526af85e8e8SLars Ellenberg /* empty resync? */ 527af85e8e8SLars Ellenberg drbd_resync_finished(mdev); 528af85e8e8SLars Ellenberg return 1; 529af85e8e8SLars Ellenberg } 530af85e8e8SLars Ellenberg 531b411b363SPhilipp Reisner if (!get_ldev(mdev)) { 532b411b363SPhilipp Reisner /* Since we only need to access mdev->rsync a 533b411b363SPhilipp Reisner get_ldev_if_state(mdev,D_FAILED) would be sufficient, but 534b411b363SPhilipp Reisner to continue resync with a broken disk makes no sense at 535b411b363SPhilipp Reisner all */ 536b411b363SPhilipp Reisner dev_err(DEV, "Disk broke down during resync!\n"); 537b411b363SPhilipp Reisner mdev->resync_work.cb = w_resync_inactive; 538b411b363SPhilipp Reisner return 1; 539b411b363SPhilipp Reisner } 540b411b363SPhilipp Reisner 541bb3d000cSLars Ellenberg /* starting with drbd 8.3.8, we can handle multi-bio EEs, 542bb3d000cSLars Ellenberg * if it should be necessary */ 5435a75cc7cSPhilipp Reisner max_segment_size = 5445a75cc7cSPhilipp Reisner mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) : 5455a75cc7cSPhilipp Reisner mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE; 546bb3d000cSLars Ellenberg 547778f271dSPhilipp Reisner if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ 548778f271dSPhilipp Reisner number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); 549778f271dSPhilipp Reisner mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; 550778f271dSPhilipp Reisner } else { 551778f271dSPhilipp Reisner mdev->c_sync_rate = mdev->sync_conf.rate; 552778f271dSPhilipp Reisner number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 553778f271dSPhilipp Reisner } 5540f0601f4SLars Ellenberg 5550f0601f4SLars Ellenberg /* Throttle resync on lower level disk activity, which may also be 5560f0601f4SLars Ellenberg * caused by application IO on Primary/SyncTarget. 5570f0601f4SLars Ellenberg * Keep this after the call to drbd_rs_controller, as that assumes 5580f0601f4SLars Ellenberg * to be called as precisely as possible every SLEEP_TIME, 5590f0601f4SLars Ellenberg * and would be confused otherwise. */ 5600f0601f4SLars Ellenberg if (drbd_rs_should_slow_down(mdev)) 5610f0601f4SLars Ellenberg goto requeue; 562b411b363SPhilipp Reisner 563b411b363SPhilipp Reisner mutex_lock(&mdev->data.mutex); 564b411b363SPhilipp Reisner if (mdev->data.socket) 565b411b363SPhilipp Reisner mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req); 566b411b363SPhilipp Reisner else 567b411b363SPhilipp Reisner mx = 1; 568b411b363SPhilipp Reisner mutex_unlock(&mdev->data.mutex); 569b411b363SPhilipp Reisner 570b411b363SPhilipp Reisner /* For resync rates >160MB/sec, allow more pending RS requests */ 571b411b363SPhilipp Reisner if (number > mx) 572b411b363SPhilipp Reisner mx = number; 573b411b363SPhilipp Reisner 574b411b363SPhilipp Reisner /* Limit the number of pending RS requests to no more than the peer's receive buffer */ 5750f0601f4SLars Ellenberg pe = atomic_read(&mdev->rs_pending_cnt); 576b411b363SPhilipp Reisner if ((pe + number) > mx) { 577b411b363SPhilipp Reisner number = mx - pe; 578b411b363SPhilipp Reisner } 579b411b363SPhilipp Reisner 580b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 581b411b363SPhilipp Reisner /* Stop generating RS requests, when half of the send buffer is filled */ 582b411b363SPhilipp Reisner mutex_lock(&mdev->data.mutex); 583b411b363SPhilipp Reisner if (mdev->data.socket) { 584b411b363SPhilipp Reisner queued = mdev->data.socket->sk->sk_wmem_queued; 585b411b363SPhilipp Reisner sndbuf = mdev->data.socket->sk->sk_sndbuf; 586b411b363SPhilipp Reisner } else { 587b411b363SPhilipp Reisner queued = 1; 588b411b363SPhilipp Reisner sndbuf = 0; 589b411b363SPhilipp Reisner } 590b411b363SPhilipp Reisner mutex_unlock(&mdev->data.mutex); 591b411b363SPhilipp Reisner if (queued > sndbuf / 2) 592b411b363SPhilipp Reisner goto requeue; 593b411b363SPhilipp Reisner 594b411b363SPhilipp Reisner next_sector: 595b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 596b411b363SPhilipp Reisner bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); 597b411b363SPhilipp Reisner 598b411b363SPhilipp Reisner if (bit == -1UL) { 599b411b363SPhilipp Reisner mdev->bm_resync_fo = drbd_bm_bits(mdev); 600b411b363SPhilipp Reisner mdev->resync_work.cb = w_resync_inactive; 601b411b363SPhilipp Reisner put_ldev(mdev); 602b411b363SPhilipp Reisner return 1; 603b411b363SPhilipp Reisner } 604b411b363SPhilipp Reisner 605b411b363SPhilipp Reisner sector = BM_BIT_TO_SECT(bit); 606b411b363SPhilipp Reisner 607b411b363SPhilipp Reisner if (drbd_try_rs_begin_io(mdev, sector)) { 608b411b363SPhilipp Reisner mdev->bm_resync_fo = bit; 609b411b363SPhilipp Reisner goto requeue; 610b411b363SPhilipp Reisner } 611b411b363SPhilipp Reisner mdev->bm_resync_fo = bit + 1; 612b411b363SPhilipp Reisner 613b411b363SPhilipp Reisner if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) { 614b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, sector); 615b411b363SPhilipp Reisner goto next_sector; 616b411b363SPhilipp Reisner } 617b411b363SPhilipp Reisner 618b411b363SPhilipp Reisner #if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE 619b411b363SPhilipp Reisner /* try to find some adjacent bits. 620b411b363SPhilipp Reisner * we stop if we have already the maximum req size. 621b411b363SPhilipp Reisner * 622b411b363SPhilipp Reisner * Additionally always align bigger requests, in order to 623b411b363SPhilipp Reisner * be prepared for all stripe sizes of software RAIDs. 624b411b363SPhilipp Reisner */ 625b411b363SPhilipp Reisner align = 1; 626d207450cSPhilipp Reisner rollback_i = i; 627b411b363SPhilipp Reisner for (;;) { 628b411b363SPhilipp Reisner if (size + BM_BLOCK_SIZE > max_segment_size) 629b411b363SPhilipp Reisner break; 630b411b363SPhilipp Reisner 631b411b363SPhilipp Reisner /* Be always aligned */ 632b411b363SPhilipp Reisner if (sector & ((1<<(align+3))-1)) 633b411b363SPhilipp Reisner break; 634b411b363SPhilipp Reisner 635b411b363SPhilipp Reisner /* do not cross extent boundaries */ 636b411b363SPhilipp Reisner if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) 637b411b363SPhilipp Reisner break; 638b411b363SPhilipp Reisner /* now, is it actually dirty, after all? 639b411b363SPhilipp Reisner * caution, drbd_bm_test_bit is tri-state for some 640b411b363SPhilipp Reisner * obscure reason; ( b == 0 ) would get the out-of-band 641b411b363SPhilipp Reisner * only accidentally right because of the "oddly sized" 642b411b363SPhilipp Reisner * adjustment below */ 643b411b363SPhilipp Reisner if (drbd_bm_test_bit(mdev, bit+1) != 1) 644b411b363SPhilipp Reisner break; 645b411b363SPhilipp Reisner bit++; 646b411b363SPhilipp Reisner size += BM_BLOCK_SIZE; 647b411b363SPhilipp Reisner if ((BM_BLOCK_SIZE << align) <= size) 648b411b363SPhilipp Reisner align++; 649b411b363SPhilipp Reisner i++; 650b411b363SPhilipp Reisner } 651b411b363SPhilipp Reisner /* if we merged some, 652b411b363SPhilipp Reisner * reset the offset to start the next drbd_bm_find_next from */ 653b411b363SPhilipp Reisner if (size > BM_BLOCK_SIZE) 654b411b363SPhilipp Reisner mdev->bm_resync_fo = bit + 1; 655b411b363SPhilipp Reisner #endif 656b411b363SPhilipp Reisner 657b411b363SPhilipp Reisner /* adjust very last sectors, in case we are oddly sized */ 658b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 659b411b363SPhilipp Reisner size = (capacity-sector)<<9; 660b411b363SPhilipp Reisner if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) { 661b411b363SPhilipp Reisner switch (read_for_csum(mdev, sector, size)) { 66280a40e43SLars Ellenberg case -EIO: /* Disk failure */ 663b411b363SPhilipp Reisner put_ldev(mdev); 664b411b363SPhilipp Reisner return 0; 66580a40e43SLars Ellenberg case -EAGAIN: /* allocation failed, or ldev busy */ 666b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, sector); 667b411b363SPhilipp Reisner mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); 668d207450cSPhilipp Reisner i = rollback_i; 669b411b363SPhilipp Reisner goto requeue; 67080a40e43SLars Ellenberg case 0: 67180a40e43SLars Ellenberg /* everything ok */ 67280a40e43SLars Ellenberg break; 67380a40e43SLars Ellenberg default: 67480a40e43SLars Ellenberg BUG(); 675b411b363SPhilipp Reisner } 676b411b363SPhilipp Reisner } else { 677b411b363SPhilipp Reisner inc_rs_pending(mdev); 678b411b363SPhilipp Reisner if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST, 679b411b363SPhilipp Reisner sector, size, ID_SYNCER)) { 680b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); 681b411b363SPhilipp Reisner dec_rs_pending(mdev); 682b411b363SPhilipp Reisner put_ldev(mdev); 683b411b363SPhilipp Reisner return 0; 684b411b363SPhilipp Reisner } 685b411b363SPhilipp Reisner } 686b411b363SPhilipp Reisner } 687b411b363SPhilipp Reisner 688b411b363SPhilipp Reisner if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) { 689b411b363SPhilipp Reisner /* last syncer _request_ was sent, 690b411b363SPhilipp Reisner * but the P_RS_DATA_REPLY not yet received. sync will end (and 691b411b363SPhilipp Reisner * next sync group will resume), as soon as we receive the last 692b411b363SPhilipp Reisner * resync data block, and the last bit is cleared. 693b411b363SPhilipp Reisner * until then resync "work" is "inactive" ... 694b411b363SPhilipp Reisner */ 695b411b363SPhilipp Reisner mdev->resync_work.cb = w_resync_inactive; 696b411b363SPhilipp Reisner put_ldev(mdev); 697b411b363SPhilipp Reisner return 1; 698b411b363SPhilipp Reisner } 699b411b363SPhilipp Reisner 700b411b363SPhilipp Reisner requeue: 701778f271dSPhilipp Reisner mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 702b411b363SPhilipp Reisner mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); 703b411b363SPhilipp Reisner put_ldev(mdev); 704b411b363SPhilipp Reisner return 1; 705b411b363SPhilipp Reisner } 706b411b363SPhilipp Reisner 707b411b363SPhilipp Reisner static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 708b411b363SPhilipp Reisner { 709b411b363SPhilipp Reisner int number, i, size; 710b411b363SPhilipp Reisner sector_t sector; 711b411b363SPhilipp Reisner const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 712b411b363SPhilipp Reisner 713b411b363SPhilipp Reisner if (unlikely(cancel)) 714b411b363SPhilipp Reisner return 1; 715b411b363SPhilipp Reisner 716b411b363SPhilipp Reisner if (unlikely(mdev->state.conn < C_CONNECTED)) { 717b411b363SPhilipp Reisner dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected"); 718b411b363SPhilipp Reisner return 0; 719b411b363SPhilipp Reisner } 720b411b363SPhilipp Reisner 721b411b363SPhilipp Reisner number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); 722b411b363SPhilipp Reisner if (atomic_read(&mdev->rs_pending_cnt) > number) 723b411b363SPhilipp Reisner goto requeue; 724b411b363SPhilipp Reisner 725b411b363SPhilipp Reisner number -= atomic_read(&mdev->rs_pending_cnt); 726b411b363SPhilipp Reisner 727b411b363SPhilipp Reisner sector = mdev->ov_position; 728b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 729b411b363SPhilipp Reisner if (sector >= capacity) { 730b411b363SPhilipp Reisner mdev->resync_work.cb = w_resync_inactive; 731b411b363SPhilipp Reisner return 1; 732b411b363SPhilipp Reisner } 733b411b363SPhilipp Reisner 734b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 735b411b363SPhilipp Reisner 736b411b363SPhilipp Reisner if (drbd_try_rs_begin_io(mdev, sector)) { 737b411b363SPhilipp Reisner mdev->ov_position = sector; 738b411b363SPhilipp Reisner goto requeue; 739b411b363SPhilipp Reisner } 740b411b363SPhilipp Reisner 741b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 742b411b363SPhilipp Reisner size = (capacity-sector)<<9; 743b411b363SPhilipp Reisner 744b411b363SPhilipp Reisner inc_rs_pending(mdev); 745b411b363SPhilipp Reisner if (!drbd_send_ov_request(mdev, sector, size)) { 746b411b363SPhilipp Reisner dec_rs_pending(mdev); 747b411b363SPhilipp Reisner return 0; 748b411b363SPhilipp Reisner } 749b411b363SPhilipp Reisner sector += BM_SECT_PER_BIT; 750b411b363SPhilipp Reisner } 751b411b363SPhilipp Reisner mdev->ov_position = sector; 752b411b363SPhilipp Reisner 753b411b363SPhilipp Reisner requeue: 754b411b363SPhilipp Reisner mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); 755b411b363SPhilipp Reisner return 1; 756b411b363SPhilipp Reisner } 757b411b363SPhilipp Reisner 758b411b363SPhilipp Reisner 759b411b363SPhilipp Reisner int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 760b411b363SPhilipp Reisner { 761b411b363SPhilipp Reisner kfree(w); 762b411b363SPhilipp Reisner ov_oos_print(mdev); 763b411b363SPhilipp Reisner drbd_resync_finished(mdev); 764b411b363SPhilipp Reisner 765b411b363SPhilipp Reisner return 1; 766b411b363SPhilipp Reisner } 767b411b363SPhilipp Reisner 768b411b363SPhilipp Reisner static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 769b411b363SPhilipp Reisner { 770b411b363SPhilipp Reisner kfree(w); 771b411b363SPhilipp Reisner 772b411b363SPhilipp Reisner drbd_resync_finished(mdev); 773b411b363SPhilipp Reisner 774b411b363SPhilipp Reisner return 1; 775b411b363SPhilipp Reisner } 776b411b363SPhilipp Reisner 777af85e8e8SLars Ellenberg static void ping_peer(struct drbd_conf *mdev) 778af85e8e8SLars Ellenberg { 779af85e8e8SLars Ellenberg clear_bit(GOT_PING_ACK, &mdev->flags); 780af85e8e8SLars Ellenberg request_ping(mdev); 781af85e8e8SLars Ellenberg wait_event(mdev->misc_wait, 782af85e8e8SLars Ellenberg test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); 783af85e8e8SLars Ellenberg } 784af85e8e8SLars Ellenberg 785b411b363SPhilipp Reisner int drbd_resync_finished(struct drbd_conf *mdev) 786b411b363SPhilipp Reisner { 787b411b363SPhilipp Reisner unsigned long db, dt, dbdt; 788b411b363SPhilipp Reisner unsigned long n_oos; 789b411b363SPhilipp Reisner union drbd_state os, ns; 790b411b363SPhilipp Reisner struct drbd_work *w; 791b411b363SPhilipp Reisner char *khelper_cmd = NULL; 792b411b363SPhilipp Reisner 793b411b363SPhilipp Reisner /* Remove all elements from the resync LRU. Since future actions 794b411b363SPhilipp Reisner * might set bits in the (main) bitmap, then the entries in the 795b411b363SPhilipp Reisner * resync LRU would be wrong. */ 796b411b363SPhilipp Reisner if (drbd_rs_del_all(mdev)) { 797b411b363SPhilipp Reisner /* In case this is not possible now, most probably because 798b411b363SPhilipp Reisner * there are P_RS_DATA_REPLY Packets lingering on the worker's 799b411b363SPhilipp Reisner * queue (or even the read operations for those packets 800b411b363SPhilipp Reisner * is not finished by now). Retry in 100ms. */ 801b411b363SPhilipp Reisner 802b411b363SPhilipp Reisner drbd_kick_lo(mdev); 803b411b363SPhilipp Reisner __set_current_state(TASK_INTERRUPTIBLE); 804b411b363SPhilipp Reisner schedule_timeout(HZ / 10); 805b411b363SPhilipp Reisner w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); 806b411b363SPhilipp Reisner if (w) { 807b411b363SPhilipp Reisner w->cb = w_resync_finished; 808b411b363SPhilipp Reisner drbd_queue_work(&mdev->data.work, w); 809b411b363SPhilipp Reisner return 1; 810b411b363SPhilipp Reisner } 811b411b363SPhilipp Reisner dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); 812b411b363SPhilipp Reisner } 813b411b363SPhilipp Reisner 814b411b363SPhilipp Reisner dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; 815b411b363SPhilipp Reisner if (dt <= 0) 816b411b363SPhilipp Reisner dt = 1; 817b411b363SPhilipp Reisner db = mdev->rs_total; 818b411b363SPhilipp Reisner dbdt = Bit2KB(db/dt); 819b411b363SPhilipp Reisner mdev->rs_paused /= HZ; 820b411b363SPhilipp Reisner 821b411b363SPhilipp Reisner if (!get_ldev(mdev)) 822b411b363SPhilipp Reisner goto out; 823b411b363SPhilipp Reisner 824af85e8e8SLars Ellenberg ping_peer(mdev); 825af85e8e8SLars Ellenberg 826b411b363SPhilipp Reisner spin_lock_irq(&mdev->req_lock); 827b411b363SPhilipp Reisner os = mdev->state; 828b411b363SPhilipp Reisner 829b411b363SPhilipp Reisner /* This protects us against multiple calls (that can happen in the presence 830b411b363SPhilipp Reisner of application IO), and against connectivity loss just before we arrive here. */ 831b411b363SPhilipp Reisner if (os.conn <= C_CONNECTED) 832b411b363SPhilipp Reisner goto out_unlock; 833b411b363SPhilipp Reisner 834b411b363SPhilipp Reisner ns = os; 835b411b363SPhilipp Reisner ns.conn = C_CONNECTED; 836b411b363SPhilipp Reisner 837b411b363SPhilipp Reisner dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 838b411b363SPhilipp Reisner (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ? 839b411b363SPhilipp Reisner "Online verify " : "Resync", 840b411b363SPhilipp Reisner dt + mdev->rs_paused, mdev->rs_paused, dbdt); 841b411b363SPhilipp Reisner 842b411b363SPhilipp Reisner n_oos = drbd_bm_total_weight(mdev); 843b411b363SPhilipp Reisner 844b411b363SPhilipp Reisner if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { 845b411b363SPhilipp Reisner if (n_oos) { 846b411b363SPhilipp Reisner dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n", 847b411b363SPhilipp Reisner n_oos, Bit2KB(1)); 848b411b363SPhilipp Reisner khelper_cmd = "out-of-sync"; 849b411b363SPhilipp Reisner } 850b411b363SPhilipp Reisner } else { 851b411b363SPhilipp Reisner D_ASSERT((n_oos - mdev->rs_failed) == 0); 852b411b363SPhilipp Reisner 853b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) 854b411b363SPhilipp Reisner khelper_cmd = "after-resync-target"; 855b411b363SPhilipp Reisner 856b411b363SPhilipp Reisner if (mdev->csums_tfm && mdev->rs_total) { 857b411b363SPhilipp Reisner const unsigned long s = mdev->rs_same_csum; 858b411b363SPhilipp Reisner const unsigned long t = mdev->rs_total; 859b411b363SPhilipp Reisner const int ratio = 860b411b363SPhilipp Reisner (t == 0) ? 0 : 861b411b363SPhilipp Reisner (t < 100000) ? ((s*100)/t) : (s/(t/100)); 862b411b363SPhilipp Reisner dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; " 863b411b363SPhilipp Reisner "transferred %luK total %luK\n", 864b411b363SPhilipp Reisner ratio, 865b411b363SPhilipp Reisner Bit2KB(mdev->rs_same_csum), 866b411b363SPhilipp Reisner Bit2KB(mdev->rs_total - mdev->rs_same_csum), 867b411b363SPhilipp Reisner Bit2KB(mdev->rs_total)); 868b411b363SPhilipp Reisner } 869b411b363SPhilipp Reisner } 870b411b363SPhilipp Reisner 871b411b363SPhilipp Reisner if (mdev->rs_failed) { 872b411b363SPhilipp Reisner dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed); 873b411b363SPhilipp Reisner 874b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 875b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 876b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 877b411b363SPhilipp Reisner } else { 878b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 879b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 880b411b363SPhilipp Reisner } 881b411b363SPhilipp Reisner } else { 882b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 883b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 884b411b363SPhilipp Reisner 885b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 886b411b363SPhilipp Reisner if (mdev->p_uuid) { 887b411b363SPhilipp Reisner int i; 888b411b363SPhilipp Reisner for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) 889b411b363SPhilipp Reisner _drbd_uuid_set(mdev, i, mdev->p_uuid[i]); 890b411b363SPhilipp Reisner drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]); 891b411b363SPhilipp Reisner _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]); 892b411b363SPhilipp Reisner } else { 893b411b363SPhilipp Reisner dev_err(DEV, "mdev->p_uuid is NULL! BUG\n"); 894b411b363SPhilipp Reisner } 895b411b363SPhilipp Reisner } 896b411b363SPhilipp Reisner 897b411b363SPhilipp Reisner drbd_uuid_set_bm(mdev, 0UL); 898b411b363SPhilipp Reisner 899b411b363SPhilipp Reisner if (mdev->p_uuid) { 900b411b363SPhilipp Reisner /* Now the two UUID sets are equal, update what we 901b411b363SPhilipp Reisner * know of the peer. */ 902b411b363SPhilipp Reisner int i; 903b411b363SPhilipp Reisner for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) 904b411b363SPhilipp Reisner mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; 905b411b363SPhilipp Reisner } 906b411b363SPhilipp Reisner } 907b411b363SPhilipp Reisner 908b411b363SPhilipp Reisner _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); 909b411b363SPhilipp Reisner out_unlock: 910b411b363SPhilipp Reisner spin_unlock_irq(&mdev->req_lock); 911b411b363SPhilipp Reisner put_ldev(mdev); 912b411b363SPhilipp Reisner out: 913b411b363SPhilipp Reisner mdev->rs_total = 0; 914b411b363SPhilipp Reisner mdev->rs_failed = 0; 915b411b363SPhilipp Reisner mdev->rs_paused = 0; 916b411b363SPhilipp Reisner mdev->ov_start_sector = 0; 917b411b363SPhilipp Reisner 918b411b363SPhilipp Reisner if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { 919b411b363SPhilipp Reisner dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n"); 920b411b363SPhilipp Reisner drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); 921b411b363SPhilipp Reisner } 922b411b363SPhilipp Reisner 923b411b363SPhilipp Reisner if (khelper_cmd) 924b411b363SPhilipp Reisner drbd_khelper(mdev, khelper_cmd); 925b411b363SPhilipp Reisner 926b411b363SPhilipp Reisner return 1; 927b411b363SPhilipp Reisner } 928b411b363SPhilipp Reisner 929b411b363SPhilipp Reisner /* helper */ 930b411b363SPhilipp Reisner static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e) 931b411b363SPhilipp Reisner { 93245bb912bSLars Ellenberg if (drbd_ee_has_active_page(e)) { 933b411b363SPhilipp Reisner /* This might happen if sendpage() has not finished */ 93478db8928SLars Ellenberg int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT; 935435f0740SLars Ellenberg atomic_add(i, &mdev->pp_in_use_by_net); 936435f0740SLars Ellenberg atomic_sub(i, &mdev->pp_in_use); 937b411b363SPhilipp Reisner spin_lock_irq(&mdev->req_lock); 938b411b363SPhilipp Reisner list_add_tail(&e->w.list, &mdev->net_ee); 939b411b363SPhilipp Reisner spin_unlock_irq(&mdev->req_lock); 940435f0740SLars Ellenberg wake_up(&drbd_pp_wait); 941b411b363SPhilipp Reisner } else 942b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 943b411b363SPhilipp Reisner } 944b411b363SPhilipp Reisner 945b411b363SPhilipp Reisner /** 946b411b363SPhilipp Reisner * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST 947b411b363SPhilipp Reisner * @mdev: DRBD device. 948b411b363SPhilipp Reisner * @w: work object. 949b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 950b411b363SPhilipp Reisner */ 951b411b363SPhilipp Reisner int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 952b411b363SPhilipp Reisner { 953b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 954b411b363SPhilipp Reisner int ok; 955b411b363SPhilipp Reisner 956b411b363SPhilipp Reisner if (unlikely(cancel)) { 957b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 958b411b363SPhilipp Reisner dec_unacked(mdev); 959b411b363SPhilipp Reisner return 1; 960b411b363SPhilipp Reisner } 961b411b363SPhilipp Reisner 96245bb912bSLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) == 0)) { 963b411b363SPhilipp Reisner ok = drbd_send_block(mdev, P_DATA_REPLY, e); 964b411b363SPhilipp Reisner } else { 965b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 966b411b363SPhilipp Reisner dev_err(DEV, "Sending NegDReply. sector=%llus.\n", 967b411b363SPhilipp Reisner (unsigned long long)e->sector); 968b411b363SPhilipp Reisner 969b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_NEG_DREPLY, e); 970b411b363SPhilipp Reisner } 971b411b363SPhilipp Reisner 972b411b363SPhilipp Reisner dec_unacked(mdev); 973b411b363SPhilipp Reisner 974b411b363SPhilipp Reisner move_to_net_ee_or_free(mdev, e); 975b411b363SPhilipp Reisner 976b411b363SPhilipp Reisner if (unlikely(!ok)) 977b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_block() failed\n"); 978b411b363SPhilipp Reisner return ok; 979b411b363SPhilipp Reisner } 980b411b363SPhilipp Reisner 981b411b363SPhilipp Reisner /** 982b411b363SPhilipp Reisner * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS 983b411b363SPhilipp Reisner * @mdev: DRBD device. 984b411b363SPhilipp Reisner * @w: work object. 985b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 986b411b363SPhilipp Reisner */ 987b411b363SPhilipp Reisner int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 988b411b363SPhilipp Reisner { 989b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 990b411b363SPhilipp Reisner int ok; 991b411b363SPhilipp Reisner 992b411b363SPhilipp Reisner if (unlikely(cancel)) { 993b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 994b411b363SPhilipp Reisner dec_unacked(mdev); 995b411b363SPhilipp Reisner return 1; 996b411b363SPhilipp Reisner } 997b411b363SPhilipp Reisner 998b411b363SPhilipp Reisner if (get_ldev_if_state(mdev, D_FAILED)) { 999b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, e->sector); 1000b411b363SPhilipp Reisner put_ldev(mdev); 1001b411b363SPhilipp Reisner } 1002b411b363SPhilipp Reisner 100345bb912bSLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1004b411b363SPhilipp Reisner if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { 1005b411b363SPhilipp Reisner inc_rs_pending(mdev); 1006b411b363SPhilipp Reisner ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); 1007b411b363SPhilipp Reisner } else { 1008b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1009b411b363SPhilipp Reisner dev_err(DEV, "Not sending RSDataReply, " 1010b411b363SPhilipp Reisner "partner DISKLESS!\n"); 1011b411b363SPhilipp Reisner ok = 1; 1012b411b363SPhilipp Reisner } 1013b411b363SPhilipp Reisner } else { 1014b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1015b411b363SPhilipp Reisner dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", 1016b411b363SPhilipp Reisner (unsigned long long)e->sector); 1017b411b363SPhilipp Reisner 1018b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); 1019b411b363SPhilipp Reisner 1020b411b363SPhilipp Reisner /* update resync data with failure */ 1021b411b363SPhilipp Reisner drbd_rs_failed_io(mdev, e->sector, e->size); 1022b411b363SPhilipp Reisner } 1023b411b363SPhilipp Reisner 1024b411b363SPhilipp Reisner dec_unacked(mdev); 1025b411b363SPhilipp Reisner 1026b411b363SPhilipp Reisner move_to_net_ee_or_free(mdev, e); 1027b411b363SPhilipp Reisner 1028b411b363SPhilipp Reisner if (unlikely(!ok)) 1029b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_block() failed\n"); 1030b411b363SPhilipp Reisner return ok; 1031b411b363SPhilipp Reisner } 1032b411b363SPhilipp Reisner 1033b411b363SPhilipp Reisner int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1034b411b363SPhilipp Reisner { 1035b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 1036b411b363SPhilipp Reisner struct digest_info *di; 1037b411b363SPhilipp Reisner int digest_size; 1038b411b363SPhilipp Reisner void *digest = NULL; 1039b411b363SPhilipp Reisner int ok, eq = 0; 1040b411b363SPhilipp Reisner 1041b411b363SPhilipp Reisner if (unlikely(cancel)) { 1042b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 1043b411b363SPhilipp Reisner dec_unacked(mdev); 1044b411b363SPhilipp Reisner return 1; 1045b411b363SPhilipp Reisner } 1046b411b363SPhilipp Reisner 10471d53f09eSLars Ellenberg if (get_ldev(mdev)) { 1048b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, e->sector); 10491d53f09eSLars Ellenberg put_ldev(mdev); 10501d53f09eSLars Ellenberg } 1051b411b363SPhilipp Reisner 105285719573SPhilipp Reisner di = e->digest; 1053b411b363SPhilipp Reisner 105445bb912bSLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1055b411b363SPhilipp Reisner /* quick hack to try to avoid a race against reconfiguration. 1056b411b363SPhilipp Reisner * a real fix would be much more involved, 1057b411b363SPhilipp Reisner * introducing more locking mechanisms */ 1058b411b363SPhilipp Reisner if (mdev->csums_tfm) { 1059b411b363SPhilipp Reisner digest_size = crypto_hash_digestsize(mdev->csums_tfm); 1060b411b363SPhilipp Reisner D_ASSERT(digest_size == di->digest_size); 1061b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1062b411b363SPhilipp Reisner } 1063b411b363SPhilipp Reisner if (digest) { 106445bb912bSLars Ellenberg drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); 1065b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1066b411b363SPhilipp Reisner kfree(digest); 1067b411b363SPhilipp Reisner } 1068b411b363SPhilipp Reisner 1069b411b363SPhilipp Reisner if (eq) { 1070b411b363SPhilipp Reisner drbd_set_in_sync(mdev, e->sector, e->size); 1071676396d5SLars Ellenberg /* rs_same_csums unit is BM_BLOCK_SIZE */ 1072676396d5SLars Ellenberg mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT; 1073b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); 1074b411b363SPhilipp Reisner } else { 1075b411b363SPhilipp Reisner inc_rs_pending(mdev); 1076204bba99SPhilipp Reisner e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ 1077204bba99SPhilipp Reisner e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */ 1078204bba99SPhilipp Reisner kfree(di); 1079b411b363SPhilipp Reisner ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); 1080b411b363SPhilipp Reisner } 1081b411b363SPhilipp Reisner } else { 1082b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); 1083b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1084b411b363SPhilipp Reisner dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); 1085b411b363SPhilipp Reisner } 1086b411b363SPhilipp Reisner 1087b411b363SPhilipp Reisner dec_unacked(mdev); 1088b411b363SPhilipp Reisner move_to_net_ee_or_free(mdev, e); 1089b411b363SPhilipp Reisner 1090b411b363SPhilipp Reisner if (unlikely(!ok)) 1091b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_block/ack() failed\n"); 1092b411b363SPhilipp Reisner return ok; 1093b411b363SPhilipp Reisner } 1094b411b363SPhilipp Reisner 1095b411b363SPhilipp Reisner int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1096b411b363SPhilipp Reisner { 1097b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 1098b411b363SPhilipp Reisner int digest_size; 1099b411b363SPhilipp Reisner void *digest; 1100b411b363SPhilipp Reisner int ok = 1; 1101b411b363SPhilipp Reisner 1102b411b363SPhilipp Reisner if (unlikely(cancel)) 1103b411b363SPhilipp Reisner goto out; 1104b411b363SPhilipp Reisner 110545bb912bSLars Ellenberg if (unlikely((e->flags & EE_WAS_ERROR) != 0)) 1106b411b363SPhilipp Reisner goto out; 1107b411b363SPhilipp Reisner 1108b411b363SPhilipp Reisner digest_size = crypto_hash_digestsize(mdev->verify_tfm); 1109b411b363SPhilipp Reisner /* FIXME if this allocation fails, online verify will not terminate! */ 1110b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1111b411b363SPhilipp Reisner if (digest) { 111245bb912bSLars Ellenberg drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); 1113b411b363SPhilipp Reisner inc_rs_pending(mdev); 1114b411b363SPhilipp Reisner ok = drbd_send_drequest_csum(mdev, e->sector, e->size, 1115b411b363SPhilipp Reisner digest, digest_size, P_OV_REPLY); 1116b411b363SPhilipp Reisner if (!ok) 1117b411b363SPhilipp Reisner dec_rs_pending(mdev); 1118b411b363SPhilipp Reisner kfree(digest); 1119b411b363SPhilipp Reisner } 1120b411b363SPhilipp Reisner 1121b411b363SPhilipp Reisner out: 1122b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 1123b411b363SPhilipp Reisner 1124b411b363SPhilipp Reisner dec_unacked(mdev); 1125b411b363SPhilipp Reisner 1126b411b363SPhilipp Reisner return ok; 1127b411b363SPhilipp Reisner } 1128b411b363SPhilipp Reisner 1129b411b363SPhilipp Reisner void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) 1130b411b363SPhilipp Reisner { 1131b411b363SPhilipp Reisner if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) { 1132b411b363SPhilipp Reisner mdev->ov_last_oos_size += size>>9; 1133b411b363SPhilipp Reisner } else { 1134b411b363SPhilipp Reisner mdev->ov_last_oos_start = sector; 1135b411b363SPhilipp Reisner mdev->ov_last_oos_size = size>>9; 1136b411b363SPhilipp Reisner } 1137b411b363SPhilipp Reisner drbd_set_out_of_sync(mdev, sector, size); 1138b411b363SPhilipp Reisner set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); 1139b411b363SPhilipp Reisner } 1140b411b363SPhilipp Reisner 1141b411b363SPhilipp Reisner int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1142b411b363SPhilipp Reisner { 1143b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 1144b411b363SPhilipp Reisner struct digest_info *di; 1145b411b363SPhilipp Reisner int digest_size; 1146b411b363SPhilipp Reisner void *digest; 1147b411b363SPhilipp Reisner int ok, eq = 0; 1148b411b363SPhilipp Reisner 1149b411b363SPhilipp Reisner if (unlikely(cancel)) { 1150b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 1151b411b363SPhilipp Reisner dec_unacked(mdev); 1152b411b363SPhilipp Reisner return 1; 1153b411b363SPhilipp Reisner } 1154b411b363SPhilipp Reisner 1155b411b363SPhilipp Reisner /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all 1156b411b363SPhilipp Reisner * the resync lru has been cleaned up already */ 11571d53f09eSLars Ellenberg if (get_ldev(mdev)) { 1158b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, e->sector); 11591d53f09eSLars Ellenberg put_ldev(mdev); 11601d53f09eSLars Ellenberg } 1161b411b363SPhilipp Reisner 116285719573SPhilipp Reisner di = e->digest; 1163b411b363SPhilipp Reisner 116445bb912bSLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1165b411b363SPhilipp Reisner digest_size = crypto_hash_digestsize(mdev->verify_tfm); 1166b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1167b411b363SPhilipp Reisner if (digest) { 116845bb912bSLars Ellenberg drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); 1169b411b363SPhilipp Reisner 1170b411b363SPhilipp Reisner D_ASSERT(digest_size == di->digest_size); 1171b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1172b411b363SPhilipp Reisner kfree(digest); 1173b411b363SPhilipp Reisner } 1174b411b363SPhilipp Reisner } else { 1175b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); 1176b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1177b411b363SPhilipp Reisner dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); 1178b411b363SPhilipp Reisner } 1179b411b363SPhilipp Reisner 1180b411b363SPhilipp Reisner dec_unacked(mdev); 1181b411b363SPhilipp Reisner if (!eq) 1182b411b363SPhilipp Reisner drbd_ov_oos_found(mdev, e->sector, e->size); 1183b411b363SPhilipp Reisner else 1184b411b363SPhilipp Reisner ov_oos_print(mdev); 1185b411b363SPhilipp Reisner 1186b411b363SPhilipp Reisner ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size, 1187b411b363SPhilipp Reisner eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); 1188b411b363SPhilipp Reisner 1189b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 1190b411b363SPhilipp Reisner 1191b411b363SPhilipp Reisner if (--mdev->ov_left == 0) { 1192b411b363SPhilipp Reisner ov_oos_print(mdev); 1193b411b363SPhilipp Reisner drbd_resync_finished(mdev); 1194b411b363SPhilipp Reisner } 1195b411b363SPhilipp Reisner 1196b411b363SPhilipp Reisner return ok; 1197b411b363SPhilipp Reisner } 1198b411b363SPhilipp Reisner 1199b411b363SPhilipp Reisner int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1200b411b363SPhilipp Reisner { 1201b411b363SPhilipp Reisner struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w); 1202b411b363SPhilipp Reisner complete(&b->done); 1203b411b363SPhilipp Reisner return 1; 1204b411b363SPhilipp Reisner } 1205b411b363SPhilipp Reisner 1206b411b363SPhilipp Reisner int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1207b411b363SPhilipp Reisner { 1208b411b363SPhilipp Reisner struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); 1209b411b363SPhilipp Reisner struct p_barrier *p = &mdev->data.sbuf.barrier; 1210b411b363SPhilipp Reisner int ok = 1; 1211b411b363SPhilipp Reisner 1212b411b363SPhilipp Reisner /* really avoid racing with tl_clear. w.cb may have been referenced 1213b411b363SPhilipp Reisner * just before it was reassigned and re-queued, so double check that. 1214b411b363SPhilipp Reisner * actually, this race was harmless, since we only try to send the 1215b411b363SPhilipp Reisner * barrier packet here, and otherwise do nothing with the object. 1216b411b363SPhilipp Reisner * but compare with the head of w_clear_epoch */ 1217b411b363SPhilipp Reisner spin_lock_irq(&mdev->req_lock); 1218b411b363SPhilipp Reisner if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED) 1219b411b363SPhilipp Reisner cancel = 1; 1220b411b363SPhilipp Reisner spin_unlock_irq(&mdev->req_lock); 1221b411b363SPhilipp Reisner if (cancel) 1222b411b363SPhilipp Reisner return 1; 1223b411b363SPhilipp Reisner 1224b411b363SPhilipp Reisner if (!drbd_get_data_sock(mdev)) 1225b411b363SPhilipp Reisner return 0; 1226b411b363SPhilipp Reisner p->barrier = b->br_number; 1227b411b363SPhilipp Reisner /* inc_ap_pending was done where this was queued. 1228b411b363SPhilipp Reisner * dec_ap_pending will be done in got_BarrierAck 1229b411b363SPhilipp Reisner * or (on connection loss) in w_clear_epoch. */ 1230b411b363SPhilipp Reisner ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER, 12310b70a13dSPhilipp Reisner (struct p_header80 *)p, sizeof(*p), 0); 1232b411b363SPhilipp Reisner drbd_put_data_sock(mdev); 1233b411b363SPhilipp Reisner 1234b411b363SPhilipp Reisner return ok; 1235b411b363SPhilipp Reisner } 1236b411b363SPhilipp Reisner 1237b411b363SPhilipp Reisner int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1238b411b363SPhilipp Reisner { 1239b411b363SPhilipp Reisner if (cancel) 1240b411b363SPhilipp Reisner return 1; 1241b411b363SPhilipp Reisner return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); 1242b411b363SPhilipp Reisner } 1243b411b363SPhilipp Reisner 1244b411b363SPhilipp Reisner /** 1245b411b363SPhilipp Reisner * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request 1246b411b363SPhilipp Reisner * @mdev: DRBD device. 1247b411b363SPhilipp Reisner * @w: work object. 1248b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1249b411b363SPhilipp Reisner */ 1250b411b363SPhilipp Reisner int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1251b411b363SPhilipp Reisner { 1252b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 1253b411b363SPhilipp Reisner int ok; 1254b411b363SPhilipp Reisner 1255b411b363SPhilipp Reisner if (unlikely(cancel)) { 1256b411b363SPhilipp Reisner req_mod(req, send_canceled); 1257b411b363SPhilipp Reisner return 1; 1258b411b363SPhilipp Reisner } 1259b411b363SPhilipp Reisner 1260b411b363SPhilipp Reisner ok = drbd_send_dblock(mdev, req); 1261b411b363SPhilipp Reisner req_mod(req, ok ? handed_over_to_network : send_failed); 1262b411b363SPhilipp Reisner 1263b411b363SPhilipp Reisner return ok; 1264b411b363SPhilipp Reisner } 1265b411b363SPhilipp Reisner 1266b411b363SPhilipp Reisner /** 1267b411b363SPhilipp Reisner * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet 1268b411b363SPhilipp Reisner * @mdev: DRBD device. 1269b411b363SPhilipp Reisner * @w: work object. 1270b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1271b411b363SPhilipp Reisner */ 1272b411b363SPhilipp Reisner int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1273b411b363SPhilipp Reisner { 1274b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 1275b411b363SPhilipp Reisner int ok; 1276b411b363SPhilipp Reisner 1277b411b363SPhilipp Reisner if (unlikely(cancel)) { 1278b411b363SPhilipp Reisner req_mod(req, send_canceled); 1279b411b363SPhilipp Reisner return 1; 1280b411b363SPhilipp Reisner } 1281b411b363SPhilipp Reisner 1282b411b363SPhilipp Reisner ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size, 1283b411b363SPhilipp Reisner (unsigned long)req); 1284b411b363SPhilipp Reisner 1285b411b363SPhilipp Reisner if (!ok) { 1286b411b363SPhilipp Reisner /* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send(); 1287b411b363SPhilipp Reisner * so this is probably redundant */ 1288b411b363SPhilipp Reisner if (mdev->state.conn >= C_CONNECTED) 1289b411b363SPhilipp Reisner drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); 1290b411b363SPhilipp Reisner } 1291b411b363SPhilipp Reisner req_mod(req, ok ? handed_over_to_network : send_failed); 1292b411b363SPhilipp Reisner 1293b411b363SPhilipp Reisner return ok; 1294b411b363SPhilipp Reisner } 1295b411b363SPhilipp Reisner 1296265be2d0SPhilipp Reisner int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1297265be2d0SPhilipp Reisner { 1298265be2d0SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 1299265be2d0SPhilipp Reisner 13000778286aSPhilipp Reisner if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) 1301265be2d0SPhilipp Reisner drbd_al_begin_io(mdev, req->sector); 1302265be2d0SPhilipp Reisner /* Calling drbd_al_begin_io() out of the worker might deadlocks 1303265be2d0SPhilipp Reisner theoretically. Practically it can not deadlock, since this is 1304265be2d0SPhilipp Reisner only used when unfreezing IOs. All the extents of the requests 1305265be2d0SPhilipp Reisner that made it into the TL are already active */ 1306265be2d0SPhilipp Reisner 1307265be2d0SPhilipp Reisner drbd_req_make_private_bio(req, req->master_bio); 1308265be2d0SPhilipp Reisner req->private_bio->bi_bdev = mdev->ldev->backing_bdev; 1309265be2d0SPhilipp Reisner generic_make_request(req->private_bio); 1310265be2d0SPhilipp Reisner 1311265be2d0SPhilipp Reisner return 1; 1312265be2d0SPhilipp Reisner } 1313265be2d0SPhilipp Reisner 1314b411b363SPhilipp Reisner static int _drbd_may_sync_now(struct drbd_conf *mdev) 1315b411b363SPhilipp Reisner { 1316b411b363SPhilipp Reisner struct drbd_conf *odev = mdev; 1317b411b363SPhilipp Reisner 1318b411b363SPhilipp Reisner while (1) { 1319b411b363SPhilipp Reisner if (odev->sync_conf.after == -1) 1320b411b363SPhilipp Reisner return 1; 1321b411b363SPhilipp Reisner odev = minor_to_mdev(odev->sync_conf.after); 1322b411b363SPhilipp Reisner ERR_IF(!odev) return 1; 1323b411b363SPhilipp Reisner if ((odev->state.conn >= C_SYNC_SOURCE && 1324b411b363SPhilipp Reisner odev->state.conn <= C_PAUSED_SYNC_T) || 1325b411b363SPhilipp Reisner odev->state.aftr_isp || odev->state.peer_isp || 1326b411b363SPhilipp Reisner odev->state.user_isp) 1327b411b363SPhilipp Reisner return 0; 1328b411b363SPhilipp Reisner } 1329b411b363SPhilipp Reisner } 1330b411b363SPhilipp Reisner 1331b411b363SPhilipp Reisner /** 1332b411b363SPhilipp Reisner * _drbd_pause_after() - Pause resync on all devices that may not resync now 1333b411b363SPhilipp Reisner * @mdev: DRBD device. 1334b411b363SPhilipp Reisner * 1335b411b363SPhilipp Reisner * Called from process context only (admin command and after_state_ch). 1336b411b363SPhilipp Reisner */ 1337b411b363SPhilipp Reisner static int _drbd_pause_after(struct drbd_conf *mdev) 1338b411b363SPhilipp Reisner { 1339b411b363SPhilipp Reisner struct drbd_conf *odev; 1340b411b363SPhilipp Reisner int i, rv = 0; 1341b411b363SPhilipp Reisner 1342b411b363SPhilipp Reisner for (i = 0; i < minor_count; i++) { 1343b411b363SPhilipp Reisner odev = minor_to_mdev(i); 1344b411b363SPhilipp Reisner if (!odev) 1345b411b363SPhilipp Reisner continue; 1346b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1347b411b363SPhilipp Reisner continue; 1348b411b363SPhilipp Reisner if (!_drbd_may_sync_now(odev)) 1349b411b363SPhilipp Reisner rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) 1350b411b363SPhilipp Reisner != SS_NOTHING_TO_DO); 1351b411b363SPhilipp Reisner } 1352b411b363SPhilipp Reisner 1353b411b363SPhilipp Reisner return rv; 1354b411b363SPhilipp Reisner } 1355b411b363SPhilipp Reisner 1356b411b363SPhilipp Reisner /** 1357b411b363SPhilipp Reisner * _drbd_resume_next() - Resume resync on all devices that may resync now 1358b411b363SPhilipp Reisner * @mdev: DRBD device. 1359b411b363SPhilipp Reisner * 1360b411b363SPhilipp Reisner * Called from process context only (admin command and worker). 1361b411b363SPhilipp Reisner */ 1362b411b363SPhilipp Reisner static int _drbd_resume_next(struct drbd_conf *mdev) 1363b411b363SPhilipp Reisner { 1364b411b363SPhilipp Reisner struct drbd_conf *odev; 1365b411b363SPhilipp Reisner int i, rv = 0; 1366b411b363SPhilipp Reisner 1367b411b363SPhilipp Reisner for (i = 0; i < minor_count; i++) { 1368b411b363SPhilipp Reisner odev = minor_to_mdev(i); 1369b411b363SPhilipp Reisner if (!odev) 1370b411b363SPhilipp Reisner continue; 1371b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1372b411b363SPhilipp Reisner continue; 1373b411b363SPhilipp Reisner if (odev->state.aftr_isp) { 1374b411b363SPhilipp Reisner if (_drbd_may_sync_now(odev)) 1375b411b363SPhilipp Reisner rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), 1376b411b363SPhilipp Reisner CS_HARD, NULL) 1377b411b363SPhilipp Reisner != SS_NOTHING_TO_DO) ; 1378b411b363SPhilipp Reisner } 1379b411b363SPhilipp Reisner } 1380b411b363SPhilipp Reisner return rv; 1381b411b363SPhilipp Reisner } 1382b411b363SPhilipp Reisner 1383b411b363SPhilipp Reisner void resume_next_sg(struct drbd_conf *mdev) 1384b411b363SPhilipp Reisner { 1385b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1386b411b363SPhilipp Reisner _drbd_resume_next(mdev); 1387b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1388b411b363SPhilipp Reisner } 1389b411b363SPhilipp Reisner 1390b411b363SPhilipp Reisner void suspend_other_sg(struct drbd_conf *mdev) 1391b411b363SPhilipp Reisner { 1392b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1393b411b363SPhilipp Reisner _drbd_pause_after(mdev); 1394b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1395b411b363SPhilipp Reisner } 1396b411b363SPhilipp Reisner 1397b411b363SPhilipp Reisner static int sync_after_error(struct drbd_conf *mdev, int o_minor) 1398b411b363SPhilipp Reisner { 1399b411b363SPhilipp Reisner struct drbd_conf *odev; 1400b411b363SPhilipp Reisner 1401b411b363SPhilipp Reisner if (o_minor == -1) 1402b411b363SPhilipp Reisner return NO_ERROR; 1403b411b363SPhilipp Reisner if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) 1404b411b363SPhilipp Reisner return ERR_SYNC_AFTER; 1405b411b363SPhilipp Reisner 1406b411b363SPhilipp Reisner /* check for loops */ 1407b411b363SPhilipp Reisner odev = minor_to_mdev(o_minor); 1408b411b363SPhilipp Reisner while (1) { 1409b411b363SPhilipp Reisner if (odev == mdev) 1410b411b363SPhilipp Reisner return ERR_SYNC_AFTER_CYCLE; 1411b411b363SPhilipp Reisner 1412b411b363SPhilipp Reisner /* dependency chain ends here, no cycles. */ 1413b411b363SPhilipp Reisner if (odev->sync_conf.after == -1) 1414b411b363SPhilipp Reisner return NO_ERROR; 1415b411b363SPhilipp Reisner 1416b411b363SPhilipp Reisner /* follow the dependency chain */ 1417b411b363SPhilipp Reisner odev = minor_to_mdev(odev->sync_conf.after); 1418b411b363SPhilipp Reisner } 1419b411b363SPhilipp Reisner } 1420b411b363SPhilipp Reisner 1421b411b363SPhilipp Reisner int drbd_alter_sa(struct drbd_conf *mdev, int na) 1422b411b363SPhilipp Reisner { 1423b411b363SPhilipp Reisner int changes; 1424b411b363SPhilipp Reisner int retcode; 1425b411b363SPhilipp Reisner 1426b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1427b411b363SPhilipp Reisner retcode = sync_after_error(mdev, na); 1428b411b363SPhilipp Reisner if (retcode == NO_ERROR) { 1429b411b363SPhilipp Reisner mdev->sync_conf.after = na; 1430b411b363SPhilipp Reisner do { 1431b411b363SPhilipp Reisner changes = _drbd_pause_after(mdev); 1432b411b363SPhilipp Reisner changes |= _drbd_resume_next(mdev); 1433b411b363SPhilipp Reisner } while (changes); 1434b411b363SPhilipp Reisner } 1435b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1436b411b363SPhilipp Reisner return retcode; 1437b411b363SPhilipp Reisner } 1438b411b363SPhilipp Reisner 1439b411b363SPhilipp Reisner /** 1440b411b363SPhilipp Reisner * drbd_start_resync() - Start the resync process 1441b411b363SPhilipp Reisner * @mdev: DRBD device. 1442b411b363SPhilipp Reisner * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET 1443b411b363SPhilipp Reisner * 1444b411b363SPhilipp Reisner * This function might bring you directly into one of the 1445b411b363SPhilipp Reisner * C_PAUSED_SYNC_* states. 1446b411b363SPhilipp Reisner */ 1447b411b363SPhilipp Reisner void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) 1448b411b363SPhilipp Reisner { 1449b411b363SPhilipp Reisner union drbd_state ns; 1450b411b363SPhilipp Reisner int r; 1451b411b363SPhilipp Reisner 1452b411b363SPhilipp Reisner if (mdev->state.conn >= C_SYNC_SOURCE) { 1453b411b363SPhilipp Reisner dev_err(DEV, "Resync already running!\n"); 1454b411b363SPhilipp Reisner return; 1455b411b363SPhilipp Reisner } 1456b411b363SPhilipp Reisner 1457b411b363SPhilipp Reisner /* In case a previous resync run was aborted by an IO error/detach on the peer. */ 1458b411b363SPhilipp Reisner drbd_rs_cancel_all(mdev); 1459b411b363SPhilipp Reisner 1460b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) { 1461b411b363SPhilipp Reisner /* Since application IO was locked out during C_WF_BITMAP_T and 1462b411b363SPhilipp Reisner C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET 1463b411b363SPhilipp Reisner we check that we might make the data inconsistent. */ 1464b411b363SPhilipp Reisner r = drbd_khelper(mdev, "before-resync-target"); 1465b411b363SPhilipp Reisner r = (r >> 8) & 0xff; 1466b411b363SPhilipp Reisner if (r > 0) { 1467b411b363SPhilipp Reisner dev_info(DEV, "before-resync-target handler returned %d, " 1468b411b363SPhilipp Reisner "dropping connection.\n", r); 1469b411b363SPhilipp Reisner drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 1470b411b363SPhilipp Reisner return; 1471b411b363SPhilipp Reisner } 1472b411b363SPhilipp Reisner } 1473b411b363SPhilipp Reisner 1474b411b363SPhilipp Reisner drbd_state_lock(mdev); 1475b411b363SPhilipp Reisner 1476b411b363SPhilipp Reisner if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { 1477b411b363SPhilipp Reisner drbd_state_unlock(mdev); 1478b411b363SPhilipp Reisner return; 1479b411b363SPhilipp Reisner } 1480b411b363SPhilipp Reisner 1481b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) { 1482b411b363SPhilipp Reisner mdev->bm_resync_fo = 0; 1483b411b363SPhilipp Reisner } else /* side == C_SYNC_SOURCE */ { 1484b411b363SPhilipp Reisner u64 uuid; 1485b411b363SPhilipp Reisner 1486b411b363SPhilipp Reisner get_random_bytes(&uuid, sizeof(u64)); 1487b411b363SPhilipp Reisner drbd_uuid_set(mdev, UI_BITMAP, uuid); 1488b411b363SPhilipp Reisner drbd_send_sync_uuid(mdev, uuid); 1489b411b363SPhilipp Reisner 1490b411b363SPhilipp Reisner D_ASSERT(mdev->state.disk == D_UP_TO_DATE); 1491b411b363SPhilipp Reisner } 1492b411b363SPhilipp Reisner 1493b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1494b411b363SPhilipp Reisner ns = mdev->state; 1495b411b363SPhilipp Reisner 1496b411b363SPhilipp Reisner ns.aftr_isp = !_drbd_may_sync_now(mdev); 1497b411b363SPhilipp Reisner 1498b411b363SPhilipp Reisner ns.conn = side; 1499b411b363SPhilipp Reisner 1500b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) 1501b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 1502b411b363SPhilipp Reisner else /* side == C_SYNC_SOURCE */ 1503b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 1504b411b363SPhilipp Reisner 1505b411b363SPhilipp Reisner r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL); 1506b411b363SPhilipp Reisner ns = mdev->state; 1507b411b363SPhilipp Reisner 1508b411b363SPhilipp Reisner if (ns.conn < C_CONNECTED) 1509b411b363SPhilipp Reisner r = SS_UNKNOWN_ERROR; 1510b411b363SPhilipp Reisner 1511b411b363SPhilipp Reisner if (r == SS_SUCCESS) { 15121d7734a0SLars Ellenberg unsigned long tw = drbd_bm_total_weight(mdev); 15131d7734a0SLars Ellenberg unsigned long now = jiffies; 15141d7734a0SLars Ellenberg int i; 15151d7734a0SLars Ellenberg 1516b411b363SPhilipp Reisner mdev->rs_failed = 0; 1517b411b363SPhilipp Reisner mdev->rs_paused = 0; 1518b411b363SPhilipp Reisner mdev->rs_same_csum = 0; 15190f0601f4SLars Ellenberg mdev->rs_last_events = 0; 15200f0601f4SLars Ellenberg mdev->rs_last_sect_ev = 0; 15211d7734a0SLars Ellenberg mdev->rs_total = tw; 15221d7734a0SLars Ellenberg mdev->rs_start = now; 15231d7734a0SLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 15241d7734a0SLars Ellenberg mdev->rs_mark_left[i] = tw; 15251d7734a0SLars Ellenberg mdev->rs_mark_time[i] = now; 15261d7734a0SLars Ellenberg } 1527b411b363SPhilipp Reisner _drbd_pause_after(mdev); 1528b411b363SPhilipp Reisner } 1529b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1530b411b363SPhilipp Reisner put_ldev(mdev); 1531b411b363SPhilipp Reisner 1532b411b363SPhilipp Reisner if (r == SS_SUCCESS) { 1533b411b363SPhilipp Reisner dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", 1534b411b363SPhilipp Reisner drbd_conn_str(ns.conn), 1535b411b363SPhilipp Reisner (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), 1536b411b363SPhilipp Reisner (unsigned long) mdev->rs_total); 1537b411b363SPhilipp Reisner 1538af85e8e8SLars Ellenberg if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) { 1539af85e8e8SLars Ellenberg /* This still has a race (about when exactly the peers 1540af85e8e8SLars Ellenberg * detect connection loss) that can lead to a full sync 1541af85e8e8SLars Ellenberg * on next handshake. In 8.3.9 we fixed this with explicit 1542af85e8e8SLars Ellenberg * resync-finished notifications, but the fix 1543af85e8e8SLars Ellenberg * introduces a protocol change. Sleeping for some 1544af85e8e8SLars Ellenberg * time longer than the ping interval + timeout on the 1545af85e8e8SLars Ellenberg * SyncSource, to give the SyncTarget the chance to 1546af85e8e8SLars Ellenberg * detect connection loss, then waiting for a ping 1547af85e8e8SLars Ellenberg * response (implicit in drbd_resync_finished) reduces 1548af85e8e8SLars Ellenberg * the race considerably, but does not solve it. */ 1549af85e8e8SLars Ellenberg if (side == C_SYNC_SOURCE) 1550af85e8e8SLars Ellenberg schedule_timeout_interruptible( 1551af85e8e8SLars Ellenberg mdev->net_conf->ping_int * HZ + 1552af85e8e8SLars Ellenberg mdev->net_conf->ping_timeo*HZ/9); 1553b411b363SPhilipp Reisner drbd_resync_finished(mdev); 1554b411b363SPhilipp Reisner } 1555b411b363SPhilipp Reisner 1556778f271dSPhilipp Reisner atomic_set(&mdev->rs_sect_in, 0); 15570f0601f4SLars Ellenberg atomic_set(&mdev->rs_sect_ev, 0); 1558778f271dSPhilipp Reisner mdev->rs_in_flight = 0; 1559778f271dSPhilipp Reisner mdev->rs_planed = 0; 1560778f271dSPhilipp Reisner spin_lock(&mdev->peer_seq_lock); 1561778f271dSPhilipp Reisner fifo_set(&mdev->rs_plan_s, 0); 1562778f271dSPhilipp Reisner spin_unlock(&mdev->peer_seq_lock); 1563b411b363SPhilipp Reisner /* ns.conn may already be != mdev->state.conn, 1564b411b363SPhilipp Reisner * we may have been paused in between, or become paused until 1565b411b363SPhilipp Reisner * the timer triggers. 1566b411b363SPhilipp Reisner * No matter, that is handled in resync_timer_fn() */ 1567b411b363SPhilipp Reisner if (ns.conn == C_SYNC_TARGET) 1568b411b363SPhilipp Reisner mod_timer(&mdev->resync_timer, jiffies); 1569b411b363SPhilipp Reisner 1570b411b363SPhilipp Reisner drbd_md_sync(mdev); 1571b411b363SPhilipp Reisner } 1572d0c3f60fSPhilipp Reisner drbd_state_unlock(mdev); 1573b411b363SPhilipp Reisner } 1574b411b363SPhilipp Reisner 1575b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi) 1576b411b363SPhilipp Reisner { 1577b411b363SPhilipp Reisner struct drbd_conf *mdev = thi->mdev; 1578b411b363SPhilipp Reisner struct drbd_work *w = NULL; 1579b411b363SPhilipp Reisner LIST_HEAD(work_list); 1580b411b363SPhilipp Reisner int intr = 0, i; 1581b411b363SPhilipp Reisner 1582b411b363SPhilipp Reisner sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); 1583b411b363SPhilipp Reisner 1584b411b363SPhilipp Reisner while (get_t_state(thi) == Running) { 1585b411b363SPhilipp Reisner drbd_thread_current_set_cpu(mdev); 1586b411b363SPhilipp Reisner 1587b411b363SPhilipp Reisner if (down_trylock(&mdev->data.work.s)) { 1588b411b363SPhilipp Reisner mutex_lock(&mdev->data.mutex); 1589b411b363SPhilipp Reisner if (mdev->data.socket && !mdev->net_conf->no_cork) 1590b411b363SPhilipp Reisner drbd_tcp_uncork(mdev->data.socket); 1591b411b363SPhilipp Reisner mutex_unlock(&mdev->data.mutex); 1592b411b363SPhilipp Reisner 1593b411b363SPhilipp Reisner intr = down_interruptible(&mdev->data.work.s); 1594b411b363SPhilipp Reisner 1595b411b363SPhilipp Reisner mutex_lock(&mdev->data.mutex); 1596b411b363SPhilipp Reisner if (mdev->data.socket && !mdev->net_conf->no_cork) 1597b411b363SPhilipp Reisner drbd_tcp_cork(mdev->data.socket); 1598b411b363SPhilipp Reisner mutex_unlock(&mdev->data.mutex); 1599b411b363SPhilipp Reisner } 1600b411b363SPhilipp Reisner 1601b411b363SPhilipp Reisner if (intr) { 1602b411b363SPhilipp Reisner D_ASSERT(intr == -EINTR); 1603b411b363SPhilipp Reisner flush_signals(current); 1604b411b363SPhilipp Reisner ERR_IF (get_t_state(thi) == Running) 1605b411b363SPhilipp Reisner continue; 1606b411b363SPhilipp Reisner break; 1607b411b363SPhilipp Reisner } 1608b411b363SPhilipp Reisner 1609b411b363SPhilipp Reisner if (get_t_state(thi) != Running) 1610b411b363SPhilipp Reisner break; 1611b411b363SPhilipp Reisner /* With this break, we have done a down() but not consumed 1612b411b363SPhilipp Reisner the entry from the list. The cleanup code takes care of 1613b411b363SPhilipp Reisner this... */ 1614b411b363SPhilipp Reisner 1615b411b363SPhilipp Reisner w = NULL; 1616b411b363SPhilipp Reisner spin_lock_irq(&mdev->data.work.q_lock); 1617b411b363SPhilipp Reisner ERR_IF(list_empty(&mdev->data.work.q)) { 1618b411b363SPhilipp Reisner /* something terribly wrong in our logic. 1619b411b363SPhilipp Reisner * we were able to down() the semaphore, 1620b411b363SPhilipp Reisner * but the list is empty... doh. 1621b411b363SPhilipp Reisner * 1622b411b363SPhilipp Reisner * what is the best thing to do now? 1623b411b363SPhilipp Reisner * try again from scratch, restarting the receiver, 1624b411b363SPhilipp Reisner * asender, whatnot? could break even more ugly, 1625b411b363SPhilipp Reisner * e.g. when we are primary, but no good local data. 1626b411b363SPhilipp Reisner * 1627b411b363SPhilipp Reisner * I'll try to get away just starting over this loop. 1628b411b363SPhilipp Reisner */ 1629b411b363SPhilipp Reisner spin_unlock_irq(&mdev->data.work.q_lock); 1630b411b363SPhilipp Reisner continue; 1631b411b363SPhilipp Reisner } 1632b411b363SPhilipp Reisner w = list_entry(mdev->data.work.q.next, struct drbd_work, list); 1633b411b363SPhilipp Reisner list_del_init(&w->list); 1634b411b363SPhilipp Reisner spin_unlock_irq(&mdev->data.work.q_lock); 1635b411b363SPhilipp Reisner 1636b411b363SPhilipp Reisner if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) { 1637b411b363SPhilipp Reisner /* dev_warn(DEV, "worker: a callback failed! \n"); */ 1638b411b363SPhilipp Reisner if (mdev->state.conn >= C_CONNECTED) 1639b411b363SPhilipp Reisner drbd_force_state(mdev, 1640b411b363SPhilipp Reisner NS(conn, C_NETWORK_FAILURE)); 1641b411b363SPhilipp Reisner } 1642b411b363SPhilipp Reisner } 1643b411b363SPhilipp Reisner D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); 1644b411b363SPhilipp Reisner D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); 1645b411b363SPhilipp Reisner 1646b411b363SPhilipp Reisner spin_lock_irq(&mdev->data.work.q_lock); 1647b411b363SPhilipp Reisner i = 0; 1648b411b363SPhilipp Reisner while (!list_empty(&mdev->data.work.q)) { 1649b411b363SPhilipp Reisner list_splice_init(&mdev->data.work.q, &work_list); 1650b411b363SPhilipp Reisner spin_unlock_irq(&mdev->data.work.q_lock); 1651b411b363SPhilipp Reisner 1652b411b363SPhilipp Reisner while (!list_empty(&work_list)) { 1653b411b363SPhilipp Reisner w = list_entry(work_list.next, struct drbd_work, list); 1654b411b363SPhilipp Reisner list_del_init(&w->list); 1655b411b363SPhilipp Reisner w->cb(mdev, w, 1); 1656b411b363SPhilipp Reisner i++; /* dead debugging code */ 1657b411b363SPhilipp Reisner } 1658b411b363SPhilipp Reisner 1659b411b363SPhilipp Reisner spin_lock_irq(&mdev->data.work.q_lock); 1660b411b363SPhilipp Reisner } 1661b411b363SPhilipp Reisner sema_init(&mdev->data.work.s, 0); 1662b411b363SPhilipp Reisner /* DANGEROUS race: if someone did queue his work within the spinlock, 1663b411b363SPhilipp Reisner * but up() ed outside the spinlock, we could get an up() on the 1664b411b363SPhilipp Reisner * semaphore without corresponding list entry. 1665b411b363SPhilipp Reisner * So don't do that. 1666b411b363SPhilipp Reisner */ 1667b411b363SPhilipp Reisner spin_unlock_irq(&mdev->data.work.q_lock); 1668b411b363SPhilipp Reisner 1669b411b363SPhilipp Reisner D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); 1670b411b363SPhilipp Reisner /* _drbd_set_state only uses stop_nowait. 1671b411b363SPhilipp Reisner * wait here for the Exiting receiver. */ 1672b411b363SPhilipp Reisner drbd_thread_stop(&mdev->receiver); 1673b411b363SPhilipp Reisner drbd_mdev_cleanup(mdev); 1674b411b363SPhilipp Reisner 1675b411b363SPhilipp Reisner dev_info(DEV, "worker terminated\n"); 1676b411b363SPhilipp Reisner 1677b411b363SPhilipp Reisner clear_bit(DEVICE_DYING, &mdev->flags); 1678b411b363SPhilipp Reisner clear_bit(CONFIG_PENDING, &mdev->flags); 1679b411b363SPhilipp Reisner wake_up(&mdev->state_wait); 1680b411b363SPhilipp Reisner 1681b411b363SPhilipp Reisner return 0; 1682b411b363SPhilipp Reisner } 1683