1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_worker.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner 24b411b363SPhilipp Reisner */ 25b411b363SPhilipp Reisner 26b411b363SPhilipp Reisner #include <linux/module.h> 27b411b363SPhilipp Reisner #include <linux/drbd.h> 28b411b363SPhilipp Reisner #include <linux/sched.h> 29b411b363SPhilipp Reisner #include <linux/wait.h> 30b411b363SPhilipp Reisner #include <linux/mm.h> 31b411b363SPhilipp Reisner #include <linux/memcontrol.h> 32b411b363SPhilipp Reisner #include <linux/mm_inline.h> 33b411b363SPhilipp Reisner #include <linux/slab.h> 34b411b363SPhilipp Reisner #include <linux/random.h> 35b411b363SPhilipp Reisner #include <linux/string.h> 36b411b363SPhilipp Reisner #include <linux/scatterlist.h> 37b411b363SPhilipp Reisner 38b411b363SPhilipp Reisner #include "drbd_int.h" 39b411b363SPhilipp Reisner #include "drbd_req.h" 40b411b363SPhilipp Reisner 41b411b363SPhilipp Reisner static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); 429d77a5feSPhilipp Reisner static int w_make_resync_request(struct drbd_conf *mdev, 439d77a5feSPhilipp Reisner struct drbd_work *w, int cancel); 44b411b363SPhilipp Reisner 45b411b363SPhilipp Reisner 46b411b363SPhilipp Reisner 47c5a91619SAndreas Gruenbacher /* endio handlers: 48c5a91619SAndreas Gruenbacher * drbd_md_io_complete (defined here) 49c5a91619SAndreas Gruenbacher * drbd_endio_pri (defined here) 50c5a91619SAndreas Gruenbacher * drbd_endio_sec (defined here) 51c5a91619SAndreas Gruenbacher * bm_async_io_complete (defined in drbd_bitmap.c) 52c5a91619SAndreas Gruenbacher * 53b411b363SPhilipp Reisner * For all these callbacks, note the following: 54b411b363SPhilipp Reisner * The callbacks will be called in irq context by the IDE drivers, 55b411b363SPhilipp Reisner * and in Softirqs/Tasklets/BH context by the SCSI drivers. 56b411b363SPhilipp Reisner * Try to get the locking right :) 57b411b363SPhilipp Reisner * 58b411b363SPhilipp Reisner */ 59b411b363SPhilipp Reisner 60b411b363SPhilipp Reisner 61b411b363SPhilipp Reisner /* About the global_state_lock 62b411b363SPhilipp Reisner Each state transition on an device holds a read lock. In case we have 63b411b363SPhilipp Reisner to evaluate the sync after dependencies, we grab a write lock, because 64b411b363SPhilipp Reisner we need stable states on all devices for that. */ 65b411b363SPhilipp Reisner rwlock_t global_state_lock; 66b411b363SPhilipp Reisner 67b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO 68b411b363SPhilipp Reisner * submitted by drbd_md_sync_page_io() 69b411b363SPhilipp Reisner */ 70b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error) 71b411b363SPhilipp Reisner { 72b411b363SPhilipp Reisner struct drbd_md_io *md_io; 73b411b363SPhilipp Reisner 74b411b363SPhilipp Reisner md_io = (struct drbd_md_io *)bio->bi_private; 75b411b363SPhilipp Reisner md_io->error = error; 76b411b363SPhilipp Reisner 77b411b363SPhilipp Reisner complete(&md_io->event); 78b411b363SPhilipp Reisner } 79b411b363SPhilipp Reisner 80b411b363SPhilipp Reisner /* reads on behalf of the partner, 81b411b363SPhilipp Reisner * "submitted" by the receiver 82b411b363SPhilipp Reisner */ 8345bb912bSLars Ellenberg void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) 84b411b363SPhilipp Reisner { 85b411b363SPhilipp Reisner unsigned long flags = 0; 8645bb912bSLars Ellenberg struct drbd_conf *mdev = e->mdev; 87b411b363SPhilipp Reisner 8887eeee41SPhilipp Reisner spin_lock_irqsave(&mdev->tconn->req_lock, flags); 89010f6e67SAndreas Gruenbacher mdev->read_cnt += e->i.size >> 9; 90b411b363SPhilipp Reisner list_del(&e->w.list); 91b411b363SPhilipp Reisner if (list_empty(&mdev->read_ee)) 92b411b363SPhilipp Reisner wake_up(&mdev->ee_wait); 9345bb912bSLars Ellenberg if (test_bit(__EE_WAS_ERROR, &e->flags)) 9481e84650SAndreas Gruenbacher __drbd_chk_io_error(mdev, false); 9587eeee41SPhilipp Reisner spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); 96b411b363SPhilipp Reisner 97e42325a5SPhilipp Reisner drbd_queue_work(&mdev->tconn->data.work, &e->w); 98b411b363SPhilipp Reisner put_ldev(mdev); 99b411b363SPhilipp Reisner } 100b411b363SPhilipp Reisner 101b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes, 10245bb912bSLars Ellenberg * "submitted" by the receiver, final stage. */ 10345bb912bSLars Ellenberg static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local) 104b411b363SPhilipp Reisner { 105b411b363SPhilipp Reisner unsigned long flags = 0; 10645bb912bSLars Ellenberg struct drbd_conf *mdev = e->mdev; 107b411b363SPhilipp Reisner sector_t e_sector; 108b411b363SPhilipp Reisner int do_wake; 109579b57edSAndreas Gruenbacher u64 block_id; 110b411b363SPhilipp Reisner int do_al_complete_io; 111b411b363SPhilipp Reisner 112b411b363SPhilipp Reisner /* after we moved e to done_ee, 113b411b363SPhilipp Reisner * we may no longer access it, 114b411b363SPhilipp Reisner * it may be freed/reused already! 115b411b363SPhilipp Reisner * (as soon as we release the req_lock) */ 116010f6e67SAndreas Gruenbacher e_sector = e->i.sector; 117b411b363SPhilipp Reisner do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; 118579b57edSAndreas Gruenbacher block_id = e->block_id; 119b411b363SPhilipp Reisner 12087eeee41SPhilipp Reisner spin_lock_irqsave(&mdev->tconn->req_lock, flags); 121010f6e67SAndreas Gruenbacher mdev->writ_cnt += e->i.size >> 9; 122b411b363SPhilipp Reisner list_del(&e->w.list); /* has been on active_ee or sync_ee */ 123b411b363SPhilipp Reisner list_add_tail(&e->w.list, &mdev->done_ee); 124b411b363SPhilipp Reisner 125bb3bfe96SAndreas Gruenbacher /* 1265e472264SAndreas Gruenbacher * Do not remove from the write_requests tree here: we did not send the 127bb3bfe96SAndreas Gruenbacher * Ack yet and did not wake possibly waiting conflicting requests. 128bb3bfe96SAndreas Gruenbacher * Removed from the tree from "drbd_process_done_ee" within the 129bb3bfe96SAndreas Gruenbacher * appropriate w.cb (e_end_block/e_end_resync_block) or from 130bb3bfe96SAndreas Gruenbacher * _drbd_clear_done_ee. 131bb3bfe96SAndreas Gruenbacher */ 132b411b363SPhilipp Reisner 133579b57edSAndreas Gruenbacher do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); 134b411b363SPhilipp Reisner 13545bb912bSLars Ellenberg if (test_bit(__EE_WAS_ERROR, &e->flags)) 13681e84650SAndreas Gruenbacher __drbd_chk_io_error(mdev, false); 13787eeee41SPhilipp Reisner spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); 138b411b363SPhilipp Reisner 139579b57edSAndreas Gruenbacher if (block_id == ID_SYNCER) 140b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, e_sector); 141b411b363SPhilipp Reisner 142b411b363SPhilipp Reisner if (do_wake) 143b411b363SPhilipp Reisner wake_up(&mdev->ee_wait); 144b411b363SPhilipp Reisner 145b411b363SPhilipp Reisner if (do_al_complete_io) 146b411b363SPhilipp Reisner drbd_al_complete_io(mdev, e_sector); 147b411b363SPhilipp Reisner 148b411b363SPhilipp Reisner wake_asender(mdev); 149b411b363SPhilipp Reisner put_ldev(mdev); 15045bb912bSLars Ellenberg } 151b411b363SPhilipp Reisner 15245bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes, 15345bb912bSLars Ellenberg * "submitted" by the receiver. 15445bb912bSLars Ellenberg */ 15545bb912bSLars Ellenberg void drbd_endio_sec(struct bio *bio, int error) 15645bb912bSLars Ellenberg { 15745bb912bSLars Ellenberg struct drbd_epoch_entry *e = bio->bi_private; 15845bb912bSLars Ellenberg struct drbd_conf *mdev = e->mdev; 15945bb912bSLars Ellenberg int uptodate = bio_flagged(bio, BIO_UPTODATE); 16045bb912bSLars Ellenberg int is_write = bio_data_dir(bio) == WRITE; 16145bb912bSLars Ellenberg 16207194272SLars Ellenberg if (error && __ratelimit(&drbd_ratelimit_state)) 16345bb912bSLars Ellenberg dev_warn(DEV, "%s: error=%d s=%llus\n", 16445bb912bSLars Ellenberg is_write ? "write" : "read", error, 165010f6e67SAndreas Gruenbacher (unsigned long long)e->i.sector); 16645bb912bSLars Ellenberg if (!error && !uptodate) { 16707194272SLars Ellenberg if (__ratelimit(&drbd_ratelimit_state)) 16845bb912bSLars Ellenberg dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", 16945bb912bSLars Ellenberg is_write ? "write" : "read", 170010f6e67SAndreas Gruenbacher (unsigned long long)e->i.sector); 17145bb912bSLars Ellenberg /* strange behavior of some lower level drivers... 17245bb912bSLars Ellenberg * fail the request by clearing the uptodate flag, 17345bb912bSLars Ellenberg * but do not return any error?! */ 17445bb912bSLars Ellenberg error = -EIO; 17545bb912bSLars Ellenberg } 17645bb912bSLars Ellenberg 17745bb912bSLars Ellenberg if (error) 17845bb912bSLars Ellenberg set_bit(__EE_WAS_ERROR, &e->flags); 17945bb912bSLars Ellenberg 18045bb912bSLars Ellenberg bio_put(bio); /* no need for the bio anymore */ 18145bb912bSLars Ellenberg if (atomic_dec_and_test(&e->pending_bios)) { 18245bb912bSLars Ellenberg if (is_write) 18345bb912bSLars Ellenberg drbd_endio_write_sec_final(e); 18445bb912bSLars Ellenberg else 18545bb912bSLars Ellenberg drbd_endio_read_sec_final(e); 18645bb912bSLars Ellenberg } 187b411b363SPhilipp Reisner } 188b411b363SPhilipp Reisner 189b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request 190b411b363SPhilipp Reisner */ 191b411b363SPhilipp Reisner void drbd_endio_pri(struct bio *bio, int error) 192b411b363SPhilipp Reisner { 193a115413dSLars Ellenberg unsigned long flags; 194b411b363SPhilipp Reisner struct drbd_request *req = bio->bi_private; 195b411b363SPhilipp Reisner struct drbd_conf *mdev = req->mdev; 196a115413dSLars Ellenberg struct bio_and_error m; 197b411b363SPhilipp Reisner enum drbd_req_event what; 198b411b363SPhilipp Reisner int uptodate = bio_flagged(bio, BIO_UPTODATE); 199b411b363SPhilipp Reisner 200b411b363SPhilipp Reisner if (!error && !uptodate) { 201b411b363SPhilipp Reisner dev_warn(DEV, "p %s: setting error to -EIO\n", 202b411b363SPhilipp Reisner bio_data_dir(bio) == WRITE ? "write" : "read"); 203b411b363SPhilipp Reisner /* strange behavior of some lower level drivers... 204b411b363SPhilipp Reisner * fail the request by clearing the uptodate flag, 205b411b363SPhilipp Reisner * but do not return any error?! */ 206b411b363SPhilipp Reisner error = -EIO; 207b411b363SPhilipp Reisner } 208b411b363SPhilipp Reisner 209b411b363SPhilipp Reisner /* to avoid recursion in __req_mod */ 210b411b363SPhilipp Reisner if (unlikely(error)) { 211b411b363SPhilipp Reisner what = (bio_data_dir(bio) == WRITE) 2128554df1cSAndreas Gruenbacher ? WRITE_COMPLETED_WITH_ERROR 2135c3c7e64SLars Ellenberg : (bio_rw(bio) == READ) 2148554df1cSAndreas Gruenbacher ? READ_COMPLETED_WITH_ERROR 2158554df1cSAndreas Gruenbacher : READ_AHEAD_COMPLETED_WITH_ERROR; 216b411b363SPhilipp Reisner } else 2178554df1cSAndreas Gruenbacher what = COMPLETED_OK; 218b411b363SPhilipp Reisner 219b411b363SPhilipp Reisner bio_put(req->private_bio); 220b411b363SPhilipp Reisner req->private_bio = ERR_PTR(error); 221b411b363SPhilipp Reisner 222a115413dSLars Ellenberg /* not req_mod(), we need irqsave here! */ 22387eeee41SPhilipp Reisner spin_lock_irqsave(&mdev->tconn->req_lock, flags); 224a115413dSLars Ellenberg __req_mod(req, what, &m); 22587eeee41SPhilipp Reisner spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); 226a115413dSLars Ellenberg 227a115413dSLars Ellenberg if (m.bio) 228a115413dSLars Ellenberg complete_master_bio(mdev, &m); 229b411b363SPhilipp Reisner } 230b411b363SPhilipp Reisner 231b411b363SPhilipp Reisner int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 232b411b363SPhilipp Reisner { 233b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 234b411b363SPhilipp Reisner 235b411b363SPhilipp Reisner /* We should not detach for read io-error, 236b411b363SPhilipp Reisner * but try to WRITE the P_DATA_REPLY to the failed location, 237b411b363SPhilipp Reisner * to give the disk the chance to relocate that block */ 238b411b363SPhilipp Reisner 23987eeee41SPhilipp Reisner spin_lock_irq(&mdev->tconn->req_lock); 240d255e5ffSLars Ellenberg if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { 2418554df1cSAndreas Gruenbacher _req_mod(req, READ_RETRY_REMOTE_CANCELED); 24287eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 243b411b363SPhilipp Reisner return 1; 244b411b363SPhilipp Reisner } 24587eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 246b411b363SPhilipp Reisner 247b411b363SPhilipp Reisner return w_send_read_req(mdev, w, 0); 248b411b363SPhilipp Reisner } 249b411b363SPhilipp Reisner 25045bb912bSLars Ellenberg void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) 25145bb912bSLars Ellenberg { 25245bb912bSLars Ellenberg struct hash_desc desc; 25345bb912bSLars Ellenberg struct scatterlist sg; 25445bb912bSLars Ellenberg struct page *page = e->pages; 25545bb912bSLars Ellenberg struct page *tmp; 25645bb912bSLars Ellenberg unsigned len; 25745bb912bSLars Ellenberg 25845bb912bSLars Ellenberg desc.tfm = tfm; 25945bb912bSLars Ellenberg desc.flags = 0; 26045bb912bSLars Ellenberg 26145bb912bSLars Ellenberg sg_init_table(&sg, 1); 26245bb912bSLars Ellenberg crypto_hash_init(&desc); 26345bb912bSLars Ellenberg 26445bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) { 26545bb912bSLars Ellenberg /* all but the last page will be fully used */ 26645bb912bSLars Ellenberg sg_set_page(&sg, page, PAGE_SIZE, 0); 26745bb912bSLars Ellenberg crypto_hash_update(&desc, &sg, sg.length); 26845bb912bSLars Ellenberg page = tmp; 26945bb912bSLars Ellenberg } 27045bb912bSLars Ellenberg /* and now the last, possibly only partially used page */ 271010f6e67SAndreas Gruenbacher len = e->i.size & (PAGE_SIZE - 1); 27245bb912bSLars Ellenberg sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); 27345bb912bSLars Ellenberg crypto_hash_update(&desc, &sg, sg.length); 27445bb912bSLars Ellenberg crypto_hash_final(&desc, digest); 27545bb912bSLars Ellenberg } 27645bb912bSLars Ellenberg 27745bb912bSLars Ellenberg void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) 278b411b363SPhilipp Reisner { 279b411b363SPhilipp Reisner struct hash_desc desc; 280b411b363SPhilipp Reisner struct scatterlist sg; 281b411b363SPhilipp Reisner struct bio_vec *bvec; 282b411b363SPhilipp Reisner int i; 283b411b363SPhilipp Reisner 284b411b363SPhilipp Reisner desc.tfm = tfm; 285b411b363SPhilipp Reisner desc.flags = 0; 286b411b363SPhilipp Reisner 287b411b363SPhilipp Reisner sg_init_table(&sg, 1); 288b411b363SPhilipp Reisner crypto_hash_init(&desc); 289b411b363SPhilipp Reisner 290b411b363SPhilipp Reisner __bio_for_each_segment(bvec, bio, i, 0) { 291b411b363SPhilipp Reisner sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); 292b411b363SPhilipp Reisner crypto_hash_update(&desc, &sg, sg.length); 293b411b363SPhilipp Reisner } 294b411b363SPhilipp Reisner crypto_hash_final(&desc, digest); 295b411b363SPhilipp Reisner } 296b411b363SPhilipp Reisner 29753ea4331SLars Ellenberg /* TODO merge common code with w_e_end_ov_req */ 29853ea4331SLars Ellenberg int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 299b411b363SPhilipp Reisner { 300b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 301b411b363SPhilipp Reisner int digest_size; 302b411b363SPhilipp Reisner void *digest; 30353ea4331SLars Ellenberg int ok = 1; 304b411b363SPhilipp Reisner 30553ea4331SLars Ellenberg if (unlikely(cancel)) 30653ea4331SLars Ellenberg goto out; 307b411b363SPhilipp Reisner 30853ea4331SLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) != 0)) 30953ea4331SLars Ellenberg goto out; 31053ea4331SLars Ellenberg 311b411b363SPhilipp Reisner digest_size = crypto_hash_digestsize(mdev->csums_tfm); 312b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 313b411b363SPhilipp Reisner if (digest) { 314010f6e67SAndreas Gruenbacher sector_t sector = e->i.sector; 315010f6e67SAndreas Gruenbacher unsigned int size = e->i.size; 31645bb912bSLars Ellenberg drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); 31753ea4331SLars Ellenberg /* Free e and pages before send. 31853ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 31953ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 32053ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 32153ea4331SLars Ellenberg * drbd_pp_alloc due to pp_in_use > max_buffers. */ 32253ea4331SLars Ellenberg drbd_free_ee(mdev, e); 32353ea4331SLars Ellenberg e = NULL; 324b411b363SPhilipp Reisner inc_rs_pending(mdev); 32553ea4331SLars Ellenberg ok = drbd_send_drequest_csum(mdev, sector, size, 32653ea4331SLars Ellenberg digest, digest_size, 327b411b363SPhilipp Reisner P_CSUM_RS_REQUEST); 328b411b363SPhilipp Reisner kfree(digest); 329b411b363SPhilipp Reisner } else { 330b411b363SPhilipp Reisner dev_err(DEV, "kmalloc() of digest failed.\n"); 331b411b363SPhilipp Reisner ok = 0; 332b411b363SPhilipp Reisner } 333b411b363SPhilipp Reisner 33453ea4331SLars Ellenberg out: 33553ea4331SLars Ellenberg if (e) 336b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 337b411b363SPhilipp Reisner 338b411b363SPhilipp Reisner if (unlikely(!ok)) 339b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); 340b411b363SPhilipp Reisner return ok; 341b411b363SPhilipp Reisner } 342b411b363SPhilipp Reisner 343b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 344b411b363SPhilipp Reisner 345b411b363SPhilipp Reisner static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) 346b411b363SPhilipp Reisner { 347b411b363SPhilipp Reisner struct drbd_epoch_entry *e; 348b411b363SPhilipp Reisner 349b411b363SPhilipp Reisner if (!get_ldev(mdev)) 35080a40e43SLars Ellenberg return -EIO; 351b411b363SPhilipp Reisner 352e3555d85SPhilipp Reisner if (drbd_rs_should_slow_down(mdev, sector)) 3530f0601f4SLars Ellenberg goto defer; 3540f0601f4SLars Ellenberg 355b411b363SPhilipp Reisner /* GFP_TRY, because if there is no memory available right now, this may 356b411b363SPhilipp Reisner * be rescheduled for later. It is "only" background resync, after all. */ 3579a8e7753SAndreas Gruenbacher e = drbd_alloc_ee(mdev, ID_SYNCER /* unused */, sector, size, GFP_TRY); 35845bb912bSLars Ellenberg if (!e) 35980a40e43SLars Ellenberg goto defer; 360b411b363SPhilipp Reisner 36180a40e43SLars Ellenberg e->w.cb = w_e_send_csum; 36287eeee41SPhilipp Reisner spin_lock_irq(&mdev->tconn->req_lock); 363b411b363SPhilipp Reisner list_add(&e->w.list, &mdev->read_ee); 36487eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 365b411b363SPhilipp Reisner 3660f0601f4SLars Ellenberg atomic_add(size >> 9, &mdev->rs_sect_ev); 36745bb912bSLars Ellenberg if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) 36880a40e43SLars Ellenberg return 0; 36945bb912bSLars Ellenberg 37010f6d992SLars Ellenberg /* If it failed because of ENOMEM, retry should help. If it failed 37110f6d992SLars Ellenberg * because bio_add_page failed (probably broken lower level driver), 37210f6d992SLars Ellenberg * retry may or may not help. 37310f6d992SLars Ellenberg * If it does not, you may need to force disconnect. */ 37487eeee41SPhilipp Reisner spin_lock_irq(&mdev->tconn->req_lock); 37522cc37a9SLars Ellenberg list_del(&e->w.list); 37687eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 37722cc37a9SLars Ellenberg 37845bb912bSLars Ellenberg drbd_free_ee(mdev, e); 37980a40e43SLars Ellenberg defer: 38045bb912bSLars Ellenberg put_ldev(mdev); 38180a40e43SLars Ellenberg return -EAGAIN; 382b411b363SPhilipp Reisner } 383b411b363SPhilipp Reisner 384794abb75SPhilipp Reisner int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 385794abb75SPhilipp Reisner { 386794abb75SPhilipp Reisner switch (mdev->state.conn) { 387794abb75SPhilipp Reisner case C_VERIFY_S: 388794abb75SPhilipp Reisner w_make_ov_request(mdev, w, cancel); 389794abb75SPhilipp Reisner break; 390794abb75SPhilipp Reisner case C_SYNC_TARGET: 391794abb75SPhilipp Reisner w_make_resync_request(mdev, w, cancel); 392794abb75SPhilipp Reisner break; 393794abb75SPhilipp Reisner } 394794abb75SPhilipp Reisner 395794abb75SPhilipp Reisner return 1; 396794abb75SPhilipp Reisner } 397794abb75SPhilipp Reisner 398b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data) 399b411b363SPhilipp Reisner { 400b411b363SPhilipp Reisner struct drbd_conf *mdev = (struct drbd_conf *) data; 401b411b363SPhilipp Reisner 402794abb75SPhilipp Reisner if (list_empty(&mdev->resync_work.list)) 403e42325a5SPhilipp Reisner drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work); 404b411b363SPhilipp Reisner } 405b411b363SPhilipp Reisner 406778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value) 407778f271dSPhilipp Reisner { 408778f271dSPhilipp Reisner int i; 409778f271dSPhilipp Reisner 410778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 411f10f2623SPhilipp Reisner fb->values[i] = value; 412778f271dSPhilipp Reisner } 413778f271dSPhilipp Reisner 414778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value) 415778f271dSPhilipp Reisner { 416778f271dSPhilipp Reisner int ov; 417778f271dSPhilipp Reisner 418778f271dSPhilipp Reisner ov = fb->values[fb->head_index]; 419778f271dSPhilipp Reisner fb->values[fb->head_index++] = value; 420778f271dSPhilipp Reisner 421778f271dSPhilipp Reisner if (fb->head_index >= fb->size) 422778f271dSPhilipp Reisner fb->head_index = 0; 423778f271dSPhilipp Reisner 424778f271dSPhilipp Reisner return ov; 425778f271dSPhilipp Reisner } 426778f271dSPhilipp Reisner 427778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value) 428778f271dSPhilipp Reisner { 429778f271dSPhilipp Reisner int i; 430778f271dSPhilipp Reisner 431778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 432778f271dSPhilipp Reisner fb->values[i] += value; 433778f271dSPhilipp Reisner } 434778f271dSPhilipp Reisner 4359d77a5feSPhilipp Reisner static int drbd_rs_controller(struct drbd_conf *mdev) 436778f271dSPhilipp Reisner { 437778f271dSPhilipp Reisner unsigned int sect_in; /* Number of sectors that came in since the last turn */ 438778f271dSPhilipp Reisner unsigned int want; /* The number of sectors we want in the proxy */ 439778f271dSPhilipp Reisner int req_sect; /* Number of sectors to request in this turn */ 440778f271dSPhilipp Reisner int correction; /* Number of sectors more we need in the proxy*/ 441778f271dSPhilipp Reisner int cps; /* correction per invocation of drbd_rs_controller() */ 442778f271dSPhilipp Reisner int steps; /* Number of time steps to plan ahead */ 443778f271dSPhilipp Reisner int curr_corr; 444778f271dSPhilipp Reisner int max_sect; 445778f271dSPhilipp Reisner 446778f271dSPhilipp Reisner sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */ 447778f271dSPhilipp Reisner mdev->rs_in_flight -= sect_in; 448778f271dSPhilipp Reisner 449778f271dSPhilipp Reisner spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ 450778f271dSPhilipp Reisner 451778f271dSPhilipp Reisner steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ 452778f271dSPhilipp Reisner 453778f271dSPhilipp Reisner if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ 454778f271dSPhilipp Reisner want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps; 455778f271dSPhilipp Reisner } else { /* normal path */ 456778f271dSPhilipp Reisner want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target : 457778f271dSPhilipp Reisner sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10); 458778f271dSPhilipp Reisner } 459778f271dSPhilipp Reisner 460778f271dSPhilipp Reisner correction = want - mdev->rs_in_flight - mdev->rs_planed; 461778f271dSPhilipp Reisner 462778f271dSPhilipp Reisner /* Plan ahead */ 463778f271dSPhilipp Reisner cps = correction / steps; 464778f271dSPhilipp Reisner fifo_add_val(&mdev->rs_plan_s, cps); 465778f271dSPhilipp Reisner mdev->rs_planed += cps * steps; 466778f271dSPhilipp Reisner 467778f271dSPhilipp Reisner /* What we do in this step */ 468778f271dSPhilipp Reisner curr_corr = fifo_push(&mdev->rs_plan_s, 0); 469778f271dSPhilipp Reisner spin_unlock(&mdev->peer_seq_lock); 470778f271dSPhilipp Reisner mdev->rs_planed -= curr_corr; 471778f271dSPhilipp Reisner 472778f271dSPhilipp Reisner req_sect = sect_in + curr_corr; 473778f271dSPhilipp Reisner if (req_sect < 0) 474778f271dSPhilipp Reisner req_sect = 0; 475778f271dSPhilipp Reisner 476778f271dSPhilipp Reisner max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ; 477778f271dSPhilipp Reisner if (req_sect > max_sect) 478778f271dSPhilipp Reisner req_sect = max_sect; 479778f271dSPhilipp Reisner 480778f271dSPhilipp Reisner /* 481778f271dSPhilipp Reisner dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", 482778f271dSPhilipp Reisner sect_in, mdev->rs_in_flight, want, correction, 483778f271dSPhilipp Reisner steps, cps, mdev->rs_planed, curr_corr, req_sect); 484778f271dSPhilipp Reisner */ 485778f271dSPhilipp Reisner 486778f271dSPhilipp Reisner return req_sect; 487778f271dSPhilipp Reisner } 488778f271dSPhilipp Reisner 4899d77a5feSPhilipp Reisner static int drbd_rs_number_requests(struct drbd_conf *mdev) 490e65f440dSLars Ellenberg { 491e65f440dSLars Ellenberg int number; 492e65f440dSLars Ellenberg if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ 493e65f440dSLars Ellenberg number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); 494e65f440dSLars Ellenberg mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; 495e65f440dSLars Ellenberg } else { 496e65f440dSLars Ellenberg mdev->c_sync_rate = mdev->sync_conf.rate; 497e65f440dSLars Ellenberg number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 498e65f440dSLars Ellenberg } 499e65f440dSLars Ellenberg 500e65f440dSLars Ellenberg /* ignore the amount of pending requests, the resync controller should 501e65f440dSLars Ellenberg * throttle down to incoming reply rate soon enough anyways. */ 502e65f440dSLars Ellenberg return number; 503e65f440dSLars Ellenberg } 504e65f440dSLars Ellenberg 5059d77a5feSPhilipp Reisner static int w_make_resync_request(struct drbd_conf *mdev, 506b411b363SPhilipp Reisner struct drbd_work *w, int cancel) 507b411b363SPhilipp Reisner { 508b411b363SPhilipp Reisner unsigned long bit; 509b411b363SPhilipp Reisner sector_t sector; 510b411b363SPhilipp Reisner const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 5111816a2b4SLars Ellenberg int max_bio_size; 512e65f440dSLars Ellenberg int number, rollback_i, size; 513b411b363SPhilipp Reisner int align, queued, sndbuf; 5140f0601f4SLars Ellenberg int i = 0; 515b411b363SPhilipp Reisner 516b411b363SPhilipp Reisner if (unlikely(cancel)) 517b411b363SPhilipp Reisner return 1; 518b411b363SPhilipp Reisner 519af85e8e8SLars Ellenberg if (mdev->rs_total == 0) { 520af85e8e8SLars Ellenberg /* empty resync? */ 521af85e8e8SLars Ellenberg drbd_resync_finished(mdev); 522af85e8e8SLars Ellenberg return 1; 523af85e8e8SLars Ellenberg } 524af85e8e8SLars Ellenberg 525b411b363SPhilipp Reisner if (!get_ldev(mdev)) { 526b411b363SPhilipp Reisner /* Since we only need to access mdev->rsync a 527b411b363SPhilipp Reisner get_ldev_if_state(mdev,D_FAILED) would be sufficient, but 528b411b363SPhilipp Reisner to continue resync with a broken disk makes no sense at 529b411b363SPhilipp Reisner all */ 530b411b363SPhilipp Reisner dev_err(DEV, "Disk broke down during resync!\n"); 531b411b363SPhilipp Reisner return 1; 532b411b363SPhilipp Reisner } 533b411b363SPhilipp Reisner 5340cfdd247SPhilipp Reisner max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; 535e65f440dSLars Ellenberg number = drbd_rs_number_requests(mdev); 536e65f440dSLars Ellenberg if (number == 0) 5370f0601f4SLars Ellenberg goto requeue; 538b411b363SPhilipp Reisner 539b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 540b411b363SPhilipp Reisner /* Stop generating RS requests, when half of the send buffer is filled */ 541e42325a5SPhilipp Reisner mutex_lock(&mdev->tconn->data.mutex); 542e42325a5SPhilipp Reisner if (mdev->tconn->data.socket) { 543e42325a5SPhilipp Reisner queued = mdev->tconn->data.socket->sk->sk_wmem_queued; 544e42325a5SPhilipp Reisner sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf; 545b411b363SPhilipp Reisner } else { 546b411b363SPhilipp Reisner queued = 1; 547b411b363SPhilipp Reisner sndbuf = 0; 548b411b363SPhilipp Reisner } 549e42325a5SPhilipp Reisner mutex_unlock(&mdev->tconn->data.mutex); 550b411b363SPhilipp Reisner if (queued > sndbuf / 2) 551b411b363SPhilipp Reisner goto requeue; 552b411b363SPhilipp Reisner 553b411b363SPhilipp Reisner next_sector: 554b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 555b411b363SPhilipp Reisner bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); 556b411b363SPhilipp Reisner 5574b0715f0SLars Ellenberg if (bit == DRBD_END_OF_BITMAP) { 558b411b363SPhilipp Reisner mdev->bm_resync_fo = drbd_bm_bits(mdev); 559b411b363SPhilipp Reisner put_ldev(mdev); 560b411b363SPhilipp Reisner return 1; 561b411b363SPhilipp Reisner } 562b411b363SPhilipp Reisner 563b411b363SPhilipp Reisner sector = BM_BIT_TO_SECT(bit); 564b411b363SPhilipp Reisner 565e3555d85SPhilipp Reisner if (drbd_rs_should_slow_down(mdev, sector) || 566e3555d85SPhilipp Reisner drbd_try_rs_begin_io(mdev, sector)) { 567b411b363SPhilipp Reisner mdev->bm_resync_fo = bit; 568b411b363SPhilipp Reisner goto requeue; 569b411b363SPhilipp Reisner } 570b411b363SPhilipp Reisner mdev->bm_resync_fo = bit + 1; 571b411b363SPhilipp Reisner 572b411b363SPhilipp Reisner if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) { 573b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, sector); 574b411b363SPhilipp Reisner goto next_sector; 575b411b363SPhilipp Reisner } 576b411b363SPhilipp Reisner 5771816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE 578b411b363SPhilipp Reisner /* try to find some adjacent bits. 579b411b363SPhilipp Reisner * we stop if we have already the maximum req size. 580b411b363SPhilipp Reisner * 581b411b363SPhilipp Reisner * Additionally always align bigger requests, in order to 582b411b363SPhilipp Reisner * be prepared for all stripe sizes of software RAIDs. 583b411b363SPhilipp Reisner */ 584b411b363SPhilipp Reisner align = 1; 585d207450cSPhilipp Reisner rollback_i = i; 586b411b363SPhilipp Reisner for (;;) { 5871816a2b4SLars Ellenberg if (size + BM_BLOCK_SIZE > max_bio_size) 588b411b363SPhilipp Reisner break; 589b411b363SPhilipp Reisner 590b411b363SPhilipp Reisner /* Be always aligned */ 591b411b363SPhilipp Reisner if (sector & ((1<<(align+3))-1)) 592b411b363SPhilipp Reisner break; 593b411b363SPhilipp Reisner 594b411b363SPhilipp Reisner /* do not cross extent boundaries */ 595b411b363SPhilipp Reisner if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) 596b411b363SPhilipp Reisner break; 597b411b363SPhilipp Reisner /* now, is it actually dirty, after all? 598b411b363SPhilipp Reisner * caution, drbd_bm_test_bit is tri-state for some 599b411b363SPhilipp Reisner * obscure reason; ( b == 0 ) would get the out-of-band 600b411b363SPhilipp Reisner * only accidentally right because of the "oddly sized" 601b411b363SPhilipp Reisner * adjustment below */ 602b411b363SPhilipp Reisner if (drbd_bm_test_bit(mdev, bit+1) != 1) 603b411b363SPhilipp Reisner break; 604b411b363SPhilipp Reisner bit++; 605b411b363SPhilipp Reisner size += BM_BLOCK_SIZE; 606b411b363SPhilipp Reisner if ((BM_BLOCK_SIZE << align) <= size) 607b411b363SPhilipp Reisner align++; 608b411b363SPhilipp Reisner i++; 609b411b363SPhilipp Reisner } 610b411b363SPhilipp Reisner /* if we merged some, 611b411b363SPhilipp Reisner * reset the offset to start the next drbd_bm_find_next from */ 612b411b363SPhilipp Reisner if (size > BM_BLOCK_SIZE) 613b411b363SPhilipp Reisner mdev->bm_resync_fo = bit + 1; 614b411b363SPhilipp Reisner #endif 615b411b363SPhilipp Reisner 616b411b363SPhilipp Reisner /* adjust very last sectors, in case we are oddly sized */ 617b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 618b411b363SPhilipp Reisner size = (capacity-sector)<<9; 61931890f4aSPhilipp Reisner if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) { 620b411b363SPhilipp Reisner switch (read_for_csum(mdev, sector, size)) { 62180a40e43SLars Ellenberg case -EIO: /* Disk failure */ 622b411b363SPhilipp Reisner put_ldev(mdev); 623b411b363SPhilipp Reisner return 0; 62480a40e43SLars Ellenberg case -EAGAIN: /* allocation failed, or ldev busy */ 625b411b363SPhilipp Reisner drbd_rs_complete_io(mdev, sector); 626b411b363SPhilipp Reisner mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); 627d207450cSPhilipp Reisner i = rollback_i; 628b411b363SPhilipp Reisner goto requeue; 62980a40e43SLars Ellenberg case 0: 63080a40e43SLars Ellenberg /* everything ok */ 63180a40e43SLars Ellenberg break; 63280a40e43SLars Ellenberg default: 63380a40e43SLars Ellenberg BUG(); 634b411b363SPhilipp Reisner } 635b411b363SPhilipp Reisner } else { 636b411b363SPhilipp Reisner inc_rs_pending(mdev); 637b411b363SPhilipp Reisner if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST, 638b411b363SPhilipp Reisner sector, size, ID_SYNCER)) { 639b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); 640b411b363SPhilipp Reisner dec_rs_pending(mdev); 641b411b363SPhilipp Reisner put_ldev(mdev); 642b411b363SPhilipp Reisner return 0; 643b411b363SPhilipp Reisner } 644b411b363SPhilipp Reisner } 645b411b363SPhilipp Reisner } 646b411b363SPhilipp Reisner 647b411b363SPhilipp Reisner if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) { 648b411b363SPhilipp Reisner /* last syncer _request_ was sent, 649b411b363SPhilipp Reisner * but the P_RS_DATA_REPLY not yet received. sync will end (and 650b411b363SPhilipp Reisner * next sync group will resume), as soon as we receive the last 651b411b363SPhilipp Reisner * resync data block, and the last bit is cleared. 652b411b363SPhilipp Reisner * until then resync "work" is "inactive" ... 653b411b363SPhilipp Reisner */ 654b411b363SPhilipp Reisner put_ldev(mdev); 655b411b363SPhilipp Reisner return 1; 656b411b363SPhilipp Reisner } 657b411b363SPhilipp Reisner 658b411b363SPhilipp Reisner requeue: 659778f271dSPhilipp Reisner mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 660b411b363SPhilipp Reisner mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); 661b411b363SPhilipp Reisner put_ldev(mdev); 662b411b363SPhilipp Reisner return 1; 663b411b363SPhilipp Reisner } 664b411b363SPhilipp Reisner 665b411b363SPhilipp Reisner static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 666b411b363SPhilipp Reisner { 667b411b363SPhilipp Reisner int number, i, size; 668b411b363SPhilipp Reisner sector_t sector; 669b411b363SPhilipp Reisner const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 670b411b363SPhilipp Reisner 671b411b363SPhilipp Reisner if (unlikely(cancel)) 672b411b363SPhilipp Reisner return 1; 673b411b363SPhilipp Reisner 6742649f080SLars Ellenberg number = drbd_rs_number_requests(mdev); 675b411b363SPhilipp Reisner 676b411b363SPhilipp Reisner sector = mdev->ov_position; 677b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 678b411b363SPhilipp Reisner if (sector >= capacity) { 679b411b363SPhilipp Reisner return 1; 680b411b363SPhilipp Reisner } 681b411b363SPhilipp Reisner 682b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 683b411b363SPhilipp Reisner 684e3555d85SPhilipp Reisner if (drbd_rs_should_slow_down(mdev, sector) || 685e3555d85SPhilipp Reisner drbd_try_rs_begin_io(mdev, sector)) { 686b411b363SPhilipp Reisner mdev->ov_position = sector; 687b411b363SPhilipp Reisner goto requeue; 688b411b363SPhilipp Reisner } 689b411b363SPhilipp Reisner 690b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 691b411b363SPhilipp Reisner size = (capacity-sector)<<9; 692b411b363SPhilipp Reisner 693b411b363SPhilipp Reisner inc_rs_pending(mdev); 694b411b363SPhilipp Reisner if (!drbd_send_ov_request(mdev, sector, size)) { 695b411b363SPhilipp Reisner dec_rs_pending(mdev); 696b411b363SPhilipp Reisner return 0; 697b411b363SPhilipp Reisner } 698b411b363SPhilipp Reisner sector += BM_SECT_PER_BIT; 699b411b363SPhilipp Reisner } 700b411b363SPhilipp Reisner mdev->ov_position = sector; 701b411b363SPhilipp Reisner 702b411b363SPhilipp Reisner requeue: 7032649f080SLars Ellenberg mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 704b411b363SPhilipp Reisner mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); 705b411b363SPhilipp Reisner return 1; 706b411b363SPhilipp Reisner } 707b411b363SPhilipp Reisner 708c4752ef1SPhilipp Reisner 709370a43e7SPhilipp Reisner void start_resync_timer_fn(unsigned long data) 710370a43e7SPhilipp Reisner { 711370a43e7SPhilipp Reisner struct drbd_conf *mdev = (struct drbd_conf *) data; 712370a43e7SPhilipp Reisner 713e42325a5SPhilipp Reisner drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); 714370a43e7SPhilipp Reisner } 715370a43e7SPhilipp Reisner 716c4752ef1SPhilipp Reisner int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 717c4752ef1SPhilipp Reisner { 718370a43e7SPhilipp Reisner if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { 719370a43e7SPhilipp Reisner dev_warn(DEV, "w_start_resync later...\n"); 720370a43e7SPhilipp Reisner mdev->start_resync_timer.expires = jiffies + HZ/10; 721370a43e7SPhilipp Reisner add_timer(&mdev->start_resync_timer); 722370a43e7SPhilipp Reisner return 1; 723370a43e7SPhilipp Reisner } 724c4752ef1SPhilipp Reisner 725370a43e7SPhilipp Reisner drbd_start_resync(mdev, C_SYNC_SOURCE); 726370a43e7SPhilipp Reisner clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); 727c4752ef1SPhilipp Reisner return 1; 728c4752ef1SPhilipp Reisner } 729c4752ef1SPhilipp Reisner 730b411b363SPhilipp Reisner int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 731b411b363SPhilipp Reisner { 732b411b363SPhilipp Reisner kfree(w); 733b411b363SPhilipp Reisner ov_oos_print(mdev); 734b411b363SPhilipp Reisner drbd_resync_finished(mdev); 735b411b363SPhilipp Reisner 736b411b363SPhilipp Reisner return 1; 737b411b363SPhilipp Reisner } 738b411b363SPhilipp Reisner 739b411b363SPhilipp Reisner static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 740b411b363SPhilipp Reisner { 741b411b363SPhilipp Reisner kfree(w); 742b411b363SPhilipp Reisner 743b411b363SPhilipp Reisner drbd_resync_finished(mdev); 744b411b363SPhilipp Reisner 745b411b363SPhilipp Reisner return 1; 746b411b363SPhilipp Reisner } 747b411b363SPhilipp Reisner 748af85e8e8SLars Ellenberg static void ping_peer(struct drbd_conf *mdev) 749af85e8e8SLars Ellenberg { 750af85e8e8SLars Ellenberg clear_bit(GOT_PING_ACK, &mdev->flags); 751af85e8e8SLars Ellenberg request_ping(mdev); 752af85e8e8SLars Ellenberg wait_event(mdev->misc_wait, 753af85e8e8SLars Ellenberg test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); 754af85e8e8SLars Ellenberg } 755af85e8e8SLars Ellenberg 756b411b363SPhilipp Reisner int drbd_resync_finished(struct drbd_conf *mdev) 757b411b363SPhilipp Reisner { 758b411b363SPhilipp Reisner unsigned long db, dt, dbdt; 759b411b363SPhilipp Reisner unsigned long n_oos; 760b411b363SPhilipp Reisner union drbd_state os, ns; 761b411b363SPhilipp Reisner struct drbd_work *w; 762b411b363SPhilipp Reisner char *khelper_cmd = NULL; 76326525618SLars Ellenberg int verify_done = 0; 764b411b363SPhilipp Reisner 765b411b363SPhilipp Reisner /* Remove all elements from the resync LRU. Since future actions 766b411b363SPhilipp Reisner * might set bits in the (main) bitmap, then the entries in the 767b411b363SPhilipp Reisner * resync LRU would be wrong. */ 768b411b363SPhilipp Reisner if (drbd_rs_del_all(mdev)) { 769b411b363SPhilipp Reisner /* In case this is not possible now, most probably because 770b411b363SPhilipp Reisner * there are P_RS_DATA_REPLY Packets lingering on the worker's 771b411b363SPhilipp Reisner * queue (or even the read operations for those packets 772b411b363SPhilipp Reisner * is not finished by now). Retry in 100ms. */ 773b411b363SPhilipp Reisner 77420ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ / 10); 775b411b363SPhilipp Reisner w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); 776b411b363SPhilipp Reisner if (w) { 777b411b363SPhilipp Reisner w->cb = w_resync_finished; 778e42325a5SPhilipp Reisner drbd_queue_work(&mdev->tconn->data.work, w); 779b411b363SPhilipp Reisner return 1; 780b411b363SPhilipp Reisner } 781b411b363SPhilipp Reisner dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); 782b411b363SPhilipp Reisner } 783b411b363SPhilipp Reisner 784b411b363SPhilipp Reisner dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; 785b411b363SPhilipp Reisner if (dt <= 0) 786b411b363SPhilipp Reisner dt = 1; 787b411b363SPhilipp Reisner db = mdev->rs_total; 788b411b363SPhilipp Reisner dbdt = Bit2KB(db/dt); 789b411b363SPhilipp Reisner mdev->rs_paused /= HZ; 790b411b363SPhilipp Reisner 791b411b363SPhilipp Reisner if (!get_ldev(mdev)) 792b411b363SPhilipp Reisner goto out; 793b411b363SPhilipp Reisner 794af85e8e8SLars Ellenberg ping_peer(mdev); 795af85e8e8SLars Ellenberg 79687eeee41SPhilipp Reisner spin_lock_irq(&mdev->tconn->req_lock); 797b411b363SPhilipp Reisner os = mdev->state; 798b411b363SPhilipp Reisner 79926525618SLars Ellenberg verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); 80026525618SLars Ellenberg 801b411b363SPhilipp Reisner /* This protects us against multiple calls (that can happen in the presence 802b411b363SPhilipp Reisner of application IO), and against connectivity loss just before we arrive here. */ 803b411b363SPhilipp Reisner if (os.conn <= C_CONNECTED) 804b411b363SPhilipp Reisner goto out_unlock; 805b411b363SPhilipp Reisner 806b411b363SPhilipp Reisner ns = os; 807b411b363SPhilipp Reisner ns.conn = C_CONNECTED; 808b411b363SPhilipp Reisner 809b411b363SPhilipp Reisner dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 81026525618SLars Ellenberg verify_done ? "Online verify " : "Resync", 811b411b363SPhilipp Reisner dt + mdev->rs_paused, mdev->rs_paused, dbdt); 812b411b363SPhilipp Reisner 813b411b363SPhilipp Reisner n_oos = drbd_bm_total_weight(mdev); 814b411b363SPhilipp Reisner 815b411b363SPhilipp Reisner if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { 816b411b363SPhilipp Reisner if (n_oos) { 817b411b363SPhilipp Reisner dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n", 818b411b363SPhilipp Reisner n_oos, Bit2KB(1)); 819b411b363SPhilipp Reisner khelper_cmd = "out-of-sync"; 820b411b363SPhilipp Reisner } 821b411b363SPhilipp Reisner } else { 822b411b363SPhilipp Reisner D_ASSERT((n_oos - mdev->rs_failed) == 0); 823b411b363SPhilipp Reisner 824b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) 825b411b363SPhilipp Reisner khelper_cmd = "after-resync-target"; 826b411b363SPhilipp Reisner 827b411b363SPhilipp Reisner if (mdev->csums_tfm && mdev->rs_total) { 828b411b363SPhilipp Reisner const unsigned long s = mdev->rs_same_csum; 829b411b363SPhilipp Reisner const unsigned long t = mdev->rs_total; 830b411b363SPhilipp Reisner const int ratio = 831b411b363SPhilipp Reisner (t == 0) ? 0 : 832b411b363SPhilipp Reisner (t < 100000) ? ((s*100)/t) : (s/(t/100)); 833b411b363SPhilipp Reisner dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; " 834b411b363SPhilipp Reisner "transferred %luK total %luK\n", 835b411b363SPhilipp Reisner ratio, 836b411b363SPhilipp Reisner Bit2KB(mdev->rs_same_csum), 837b411b363SPhilipp Reisner Bit2KB(mdev->rs_total - mdev->rs_same_csum), 838b411b363SPhilipp Reisner Bit2KB(mdev->rs_total)); 839b411b363SPhilipp Reisner } 840b411b363SPhilipp Reisner } 841b411b363SPhilipp Reisner 842b411b363SPhilipp Reisner if (mdev->rs_failed) { 843b411b363SPhilipp Reisner dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed); 844b411b363SPhilipp Reisner 845b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 846b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 847b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 848b411b363SPhilipp Reisner } else { 849b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 850b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 851b411b363SPhilipp Reisner } 852b411b363SPhilipp Reisner } else { 853b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 854b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 855b411b363SPhilipp Reisner 856b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 857b411b363SPhilipp Reisner if (mdev->p_uuid) { 858b411b363SPhilipp Reisner int i; 859b411b363SPhilipp Reisner for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) 860b411b363SPhilipp Reisner _drbd_uuid_set(mdev, i, mdev->p_uuid[i]); 861b411b363SPhilipp Reisner drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]); 862b411b363SPhilipp Reisner _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]); 863b411b363SPhilipp Reisner } else { 864b411b363SPhilipp Reisner dev_err(DEV, "mdev->p_uuid is NULL! BUG\n"); 865b411b363SPhilipp Reisner } 866b411b363SPhilipp Reisner } 867b411b363SPhilipp Reisner 86862b0da3aSLars Ellenberg if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { 86962b0da3aSLars Ellenberg /* for verify runs, we don't update uuids here, 87062b0da3aSLars Ellenberg * so there would be nothing to report. */ 871b411b363SPhilipp Reisner drbd_uuid_set_bm(mdev, 0UL); 87262b0da3aSLars Ellenberg drbd_print_uuids(mdev, "updated UUIDs"); 873b411b363SPhilipp Reisner if (mdev->p_uuid) { 874b411b363SPhilipp Reisner /* Now the two UUID sets are equal, update what we 875b411b363SPhilipp Reisner * know of the peer. */ 876b411b363SPhilipp Reisner int i; 877b411b363SPhilipp Reisner for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) 878b411b363SPhilipp Reisner mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; 879b411b363SPhilipp Reisner } 880b411b363SPhilipp Reisner } 88162b0da3aSLars Ellenberg } 882b411b363SPhilipp Reisner 883b411b363SPhilipp Reisner _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); 884b411b363SPhilipp Reisner out_unlock: 88587eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 886b411b363SPhilipp Reisner put_ldev(mdev); 887b411b363SPhilipp Reisner out: 888b411b363SPhilipp Reisner mdev->rs_total = 0; 889b411b363SPhilipp Reisner mdev->rs_failed = 0; 890b411b363SPhilipp Reisner mdev->rs_paused = 0; 89126525618SLars Ellenberg if (verify_done) 892b411b363SPhilipp Reisner mdev->ov_start_sector = 0; 893b411b363SPhilipp Reisner 89413d42685SLars Ellenberg drbd_md_sync(mdev); 89513d42685SLars Ellenberg 896b411b363SPhilipp Reisner if (khelper_cmd) 897b411b363SPhilipp Reisner drbd_khelper(mdev, khelper_cmd); 898b411b363SPhilipp Reisner 899b411b363SPhilipp Reisner return 1; 900b411b363SPhilipp Reisner } 901b411b363SPhilipp Reisner 902b411b363SPhilipp Reisner /* helper */ 903b411b363SPhilipp Reisner static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e) 904b411b363SPhilipp Reisner { 90545bb912bSLars Ellenberg if (drbd_ee_has_active_page(e)) { 906b411b363SPhilipp Reisner /* This might happen if sendpage() has not finished */ 907010f6e67SAndreas Gruenbacher int i = (e->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; 908435f0740SLars Ellenberg atomic_add(i, &mdev->pp_in_use_by_net); 909435f0740SLars Ellenberg atomic_sub(i, &mdev->pp_in_use); 91087eeee41SPhilipp Reisner spin_lock_irq(&mdev->tconn->req_lock); 911b411b363SPhilipp Reisner list_add_tail(&e->w.list, &mdev->net_ee); 91287eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 913435f0740SLars Ellenberg wake_up(&drbd_pp_wait); 914b411b363SPhilipp Reisner } else 915b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 916b411b363SPhilipp Reisner } 917b411b363SPhilipp Reisner 918b411b363SPhilipp Reisner /** 919b411b363SPhilipp Reisner * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST 920b411b363SPhilipp Reisner * @mdev: DRBD device. 921b411b363SPhilipp Reisner * @w: work object. 922b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 923b411b363SPhilipp Reisner */ 924b411b363SPhilipp Reisner int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 925b411b363SPhilipp Reisner { 926b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 927b411b363SPhilipp Reisner int ok; 928b411b363SPhilipp Reisner 929b411b363SPhilipp Reisner if (unlikely(cancel)) { 930b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 931b411b363SPhilipp Reisner dec_unacked(mdev); 932b411b363SPhilipp Reisner return 1; 933b411b363SPhilipp Reisner } 934b411b363SPhilipp Reisner 93545bb912bSLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) == 0)) { 936b411b363SPhilipp Reisner ok = drbd_send_block(mdev, P_DATA_REPLY, e); 937b411b363SPhilipp Reisner } else { 938b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 939b411b363SPhilipp Reisner dev_err(DEV, "Sending NegDReply. sector=%llus.\n", 940010f6e67SAndreas Gruenbacher (unsigned long long)e->i.sector); 941b411b363SPhilipp Reisner 942b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_NEG_DREPLY, e); 943b411b363SPhilipp Reisner } 944b411b363SPhilipp Reisner 945b411b363SPhilipp Reisner dec_unacked(mdev); 946b411b363SPhilipp Reisner 947b411b363SPhilipp Reisner move_to_net_ee_or_free(mdev, e); 948b411b363SPhilipp Reisner 949b411b363SPhilipp Reisner if (unlikely(!ok)) 950b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_block() failed\n"); 951b411b363SPhilipp Reisner return ok; 952b411b363SPhilipp Reisner } 953b411b363SPhilipp Reisner 954b411b363SPhilipp Reisner /** 955b411b363SPhilipp Reisner * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS 956b411b363SPhilipp Reisner * @mdev: DRBD device. 957b411b363SPhilipp Reisner * @w: work object. 958b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 959b411b363SPhilipp Reisner */ 960b411b363SPhilipp Reisner int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 961b411b363SPhilipp Reisner { 962b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 963b411b363SPhilipp Reisner int ok; 964b411b363SPhilipp Reisner 965b411b363SPhilipp Reisner if (unlikely(cancel)) { 966b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 967b411b363SPhilipp Reisner dec_unacked(mdev); 968b411b363SPhilipp Reisner return 1; 969b411b363SPhilipp Reisner } 970b411b363SPhilipp Reisner 971b411b363SPhilipp Reisner if (get_ldev_if_state(mdev, D_FAILED)) { 972010f6e67SAndreas Gruenbacher drbd_rs_complete_io(mdev, e->i.sector); 973b411b363SPhilipp Reisner put_ldev(mdev); 974b411b363SPhilipp Reisner } 975b411b363SPhilipp Reisner 976d612d309SPhilipp Reisner if (mdev->state.conn == C_AHEAD) { 977d612d309SPhilipp Reisner ok = drbd_send_ack(mdev, P_RS_CANCEL, e); 978d612d309SPhilipp Reisner } else if (likely((e->flags & EE_WAS_ERROR) == 0)) { 979b411b363SPhilipp Reisner if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { 980b411b363SPhilipp Reisner inc_rs_pending(mdev); 981b411b363SPhilipp Reisner ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); 982b411b363SPhilipp Reisner } else { 983b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 984b411b363SPhilipp Reisner dev_err(DEV, "Not sending RSDataReply, " 985b411b363SPhilipp Reisner "partner DISKLESS!\n"); 986b411b363SPhilipp Reisner ok = 1; 987b411b363SPhilipp Reisner } 988b411b363SPhilipp Reisner } else { 989b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 990b411b363SPhilipp Reisner dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", 991010f6e67SAndreas Gruenbacher (unsigned long long)e->i.sector); 992b411b363SPhilipp Reisner 993b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); 994b411b363SPhilipp Reisner 995b411b363SPhilipp Reisner /* update resync data with failure */ 996010f6e67SAndreas Gruenbacher drbd_rs_failed_io(mdev, e->i.sector, e->i.size); 997b411b363SPhilipp Reisner } 998b411b363SPhilipp Reisner 999b411b363SPhilipp Reisner dec_unacked(mdev); 1000b411b363SPhilipp Reisner 1001b411b363SPhilipp Reisner move_to_net_ee_or_free(mdev, e); 1002b411b363SPhilipp Reisner 1003b411b363SPhilipp Reisner if (unlikely(!ok)) 1004b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_block() failed\n"); 1005b411b363SPhilipp Reisner return ok; 1006b411b363SPhilipp Reisner } 1007b411b363SPhilipp Reisner 1008b411b363SPhilipp Reisner int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1009b411b363SPhilipp Reisner { 1010b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 1011b411b363SPhilipp Reisner struct digest_info *di; 1012b411b363SPhilipp Reisner int digest_size; 1013b411b363SPhilipp Reisner void *digest = NULL; 1014b411b363SPhilipp Reisner int ok, eq = 0; 1015b411b363SPhilipp Reisner 1016b411b363SPhilipp Reisner if (unlikely(cancel)) { 1017b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 1018b411b363SPhilipp Reisner dec_unacked(mdev); 1019b411b363SPhilipp Reisner return 1; 1020b411b363SPhilipp Reisner } 1021b411b363SPhilipp Reisner 10221d53f09eSLars Ellenberg if (get_ldev(mdev)) { 1023010f6e67SAndreas Gruenbacher drbd_rs_complete_io(mdev, e->i.sector); 10241d53f09eSLars Ellenberg put_ldev(mdev); 10251d53f09eSLars Ellenberg } 1026b411b363SPhilipp Reisner 102785719573SPhilipp Reisner di = e->digest; 1028b411b363SPhilipp Reisner 102945bb912bSLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1030b411b363SPhilipp Reisner /* quick hack to try to avoid a race against reconfiguration. 1031b411b363SPhilipp Reisner * a real fix would be much more involved, 1032b411b363SPhilipp Reisner * introducing more locking mechanisms */ 1033b411b363SPhilipp Reisner if (mdev->csums_tfm) { 1034b411b363SPhilipp Reisner digest_size = crypto_hash_digestsize(mdev->csums_tfm); 1035b411b363SPhilipp Reisner D_ASSERT(digest_size == di->digest_size); 1036b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1037b411b363SPhilipp Reisner } 1038b411b363SPhilipp Reisner if (digest) { 103945bb912bSLars Ellenberg drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); 1040b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1041b411b363SPhilipp Reisner kfree(digest); 1042b411b363SPhilipp Reisner } 1043b411b363SPhilipp Reisner 1044b411b363SPhilipp Reisner if (eq) { 1045010f6e67SAndreas Gruenbacher drbd_set_in_sync(mdev, e->i.sector, e->i.size); 1046676396d5SLars Ellenberg /* rs_same_csums unit is BM_BLOCK_SIZE */ 1047010f6e67SAndreas Gruenbacher mdev->rs_same_csum += e->i.size >> BM_BLOCK_SHIFT; 1048b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); 1049b411b363SPhilipp Reisner } else { 1050b411b363SPhilipp Reisner inc_rs_pending(mdev); 1051204bba99SPhilipp Reisner e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ 1052204bba99SPhilipp Reisner e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */ 1053204bba99SPhilipp Reisner kfree(di); 1054b411b363SPhilipp Reisner ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); 1055b411b363SPhilipp Reisner } 1056b411b363SPhilipp Reisner } else { 1057b411b363SPhilipp Reisner ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); 1058b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1059b411b363SPhilipp Reisner dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); 1060b411b363SPhilipp Reisner } 1061b411b363SPhilipp Reisner 1062b411b363SPhilipp Reisner dec_unacked(mdev); 1063b411b363SPhilipp Reisner move_to_net_ee_or_free(mdev, e); 1064b411b363SPhilipp Reisner 1065b411b363SPhilipp Reisner if (unlikely(!ok)) 1066b411b363SPhilipp Reisner dev_err(DEV, "drbd_send_block/ack() failed\n"); 1067b411b363SPhilipp Reisner return ok; 1068b411b363SPhilipp Reisner } 1069b411b363SPhilipp Reisner 107053ea4331SLars Ellenberg /* TODO merge common code with w_e_send_csum */ 1071b411b363SPhilipp Reisner int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1072b411b363SPhilipp Reisner { 1073b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 1074010f6e67SAndreas Gruenbacher sector_t sector = e->i.sector; 1075010f6e67SAndreas Gruenbacher unsigned int size = e->i.size; 1076b411b363SPhilipp Reisner int digest_size; 1077b411b363SPhilipp Reisner void *digest; 1078b411b363SPhilipp Reisner int ok = 1; 1079b411b363SPhilipp Reisner 1080b411b363SPhilipp Reisner if (unlikely(cancel)) 1081b411b363SPhilipp Reisner goto out; 1082b411b363SPhilipp Reisner 1083b411b363SPhilipp Reisner digest_size = crypto_hash_digestsize(mdev->verify_tfm); 1084b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 10858f21420eSPhilipp Reisner if (!digest) { 10868f21420eSPhilipp Reisner ok = 0; /* terminate the connection in case the allocation failed */ 10878f21420eSPhilipp Reisner goto out; 10888f21420eSPhilipp Reisner } 10898f21420eSPhilipp Reisner 10908f21420eSPhilipp Reisner if (likely(!(e->flags & EE_WAS_ERROR))) 109145bb912bSLars Ellenberg drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); 10928f21420eSPhilipp Reisner else 10938f21420eSPhilipp Reisner memset(digest, 0, digest_size); 10948f21420eSPhilipp Reisner 109553ea4331SLars Ellenberg /* Free e and pages before send. 109653ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 109753ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 109853ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 109953ea4331SLars Ellenberg * drbd_pp_alloc due to pp_in_use > max_buffers. */ 110053ea4331SLars Ellenberg drbd_free_ee(mdev, e); 110153ea4331SLars Ellenberg e = NULL; 1102b411b363SPhilipp Reisner inc_rs_pending(mdev); 110353ea4331SLars Ellenberg ok = drbd_send_drequest_csum(mdev, sector, size, 110453ea4331SLars Ellenberg digest, digest_size, 110553ea4331SLars Ellenberg P_OV_REPLY); 1106b411b363SPhilipp Reisner if (!ok) 1107b411b363SPhilipp Reisner dec_rs_pending(mdev); 1108b411b363SPhilipp Reisner kfree(digest); 1109b411b363SPhilipp Reisner 1110b411b363SPhilipp Reisner out: 111153ea4331SLars Ellenberg if (e) 1112b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 1113b411b363SPhilipp Reisner dec_unacked(mdev); 1114b411b363SPhilipp Reisner return ok; 1115b411b363SPhilipp Reisner } 1116b411b363SPhilipp Reisner 1117b411b363SPhilipp Reisner void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) 1118b411b363SPhilipp Reisner { 1119b411b363SPhilipp Reisner if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) { 1120b411b363SPhilipp Reisner mdev->ov_last_oos_size += size>>9; 1121b411b363SPhilipp Reisner } else { 1122b411b363SPhilipp Reisner mdev->ov_last_oos_start = sector; 1123b411b363SPhilipp Reisner mdev->ov_last_oos_size = size>>9; 1124b411b363SPhilipp Reisner } 1125b411b363SPhilipp Reisner drbd_set_out_of_sync(mdev, sector, size); 1126b411b363SPhilipp Reisner } 1127b411b363SPhilipp Reisner 1128b411b363SPhilipp Reisner int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1129b411b363SPhilipp Reisner { 1130b411b363SPhilipp Reisner struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 1131b411b363SPhilipp Reisner struct digest_info *di; 1132b411b363SPhilipp Reisner void *digest; 1133010f6e67SAndreas Gruenbacher sector_t sector = e->i.sector; 1134010f6e67SAndreas Gruenbacher unsigned int size = e->i.size; 113553ea4331SLars Ellenberg int digest_size; 1136b411b363SPhilipp Reisner int ok, eq = 0; 1137b411b363SPhilipp Reisner 1138b411b363SPhilipp Reisner if (unlikely(cancel)) { 1139b411b363SPhilipp Reisner drbd_free_ee(mdev, e); 1140b411b363SPhilipp Reisner dec_unacked(mdev); 1141b411b363SPhilipp Reisner return 1; 1142b411b363SPhilipp Reisner } 1143b411b363SPhilipp Reisner 1144b411b363SPhilipp Reisner /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all 1145b411b363SPhilipp Reisner * the resync lru has been cleaned up already */ 11461d53f09eSLars Ellenberg if (get_ldev(mdev)) { 1147010f6e67SAndreas Gruenbacher drbd_rs_complete_io(mdev, e->i.sector); 11481d53f09eSLars Ellenberg put_ldev(mdev); 11491d53f09eSLars Ellenberg } 1150b411b363SPhilipp Reisner 115185719573SPhilipp Reisner di = e->digest; 1152b411b363SPhilipp Reisner 115345bb912bSLars Ellenberg if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1154b411b363SPhilipp Reisner digest_size = crypto_hash_digestsize(mdev->verify_tfm); 1155b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1156b411b363SPhilipp Reisner if (digest) { 115745bb912bSLars Ellenberg drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); 1158b411b363SPhilipp Reisner 1159b411b363SPhilipp Reisner D_ASSERT(digest_size == di->digest_size); 1160b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1161b411b363SPhilipp Reisner kfree(digest); 1162b411b363SPhilipp Reisner } 1163b411b363SPhilipp Reisner } 1164b411b363SPhilipp Reisner 116553ea4331SLars Ellenberg /* Free e and pages before send. 116653ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 116753ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 116853ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 116953ea4331SLars Ellenberg * drbd_pp_alloc due to pp_in_use > max_buffers. */ 117053ea4331SLars Ellenberg drbd_free_ee(mdev, e); 1171b411b363SPhilipp Reisner if (!eq) 117253ea4331SLars Ellenberg drbd_ov_oos_found(mdev, sector, size); 1173b411b363SPhilipp Reisner else 1174b411b363SPhilipp Reisner ov_oos_print(mdev); 1175b411b363SPhilipp Reisner 117653ea4331SLars Ellenberg ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, 1177b411b363SPhilipp Reisner eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); 1178b411b363SPhilipp Reisner 117953ea4331SLars Ellenberg dec_unacked(mdev); 1180b411b363SPhilipp Reisner 1181ea5442afSLars Ellenberg --mdev->ov_left; 1182ea5442afSLars Ellenberg 1183ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */ 1184ea5442afSLars Ellenberg if ((mdev->ov_left & 0x200) == 0x200) 1185ea5442afSLars Ellenberg drbd_advance_rs_marks(mdev, mdev->ov_left); 1186ea5442afSLars Ellenberg 1187ea5442afSLars Ellenberg if (mdev->ov_left == 0) { 1188b411b363SPhilipp Reisner ov_oos_print(mdev); 1189b411b363SPhilipp Reisner drbd_resync_finished(mdev); 1190b411b363SPhilipp Reisner } 1191b411b363SPhilipp Reisner 1192b411b363SPhilipp Reisner return ok; 1193b411b363SPhilipp Reisner } 1194b411b363SPhilipp Reisner 1195b411b363SPhilipp Reisner int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1196b411b363SPhilipp Reisner { 1197b411b363SPhilipp Reisner struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w); 1198b411b363SPhilipp Reisner complete(&b->done); 1199b411b363SPhilipp Reisner return 1; 1200b411b363SPhilipp Reisner } 1201b411b363SPhilipp Reisner 1202b411b363SPhilipp Reisner int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1203b411b363SPhilipp Reisner { 1204b411b363SPhilipp Reisner struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); 1205e42325a5SPhilipp Reisner struct p_barrier *p = &mdev->tconn->data.sbuf.barrier; 1206b411b363SPhilipp Reisner int ok = 1; 1207b411b363SPhilipp Reisner 1208b411b363SPhilipp Reisner /* really avoid racing with tl_clear. w.cb may have been referenced 1209b411b363SPhilipp Reisner * just before it was reassigned and re-queued, so double check that. 1210b411b363SPhilipp Reisner * actually, this race was harmless, since we only try to send the 1211b411b363SPhilipp Reisner * barrier packet here, and otherwise do nothing with the object. 1212b411b363SPhilipp Reisner * but compare with the head of w_clear_epoch */ 121387eeee41SPhilipp Reisner spin_lock_irq(&mdev->tconn->req_lock); 1214b411b363SPhilipp Reisner if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED) 1215b411b363SPhilipp Reisner cancel = 1; 121687eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 1217b411b363SPhilipp Reisner if (cancel) 1218b411b363SPhilipp Reisner return 1; 1219b411b363SPhilipp Reisner 1220b411b363SPhilipp Reisner if (!drbd_get_data_sock(mdev)) 1221b411b363SPhilipp Reisner return 0; 1222b411b363SPhilipp Reisner p->barrier = b->br_number; 1223b411b363SPhilipp Reisner /* inc_ap_pending was done where this was queued. 1224b411b363SPhilipp Reisner * dec_ap_pending will be done in got_BarrierAck 1225b411b363SPhilipp Reisner * or (on connection loss) in w_clear_epoch. */ 1226e42325a5SPhilipp Reisner ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, 1227c012949aSPhilipp Reisner &p->head, sizeof(*p), 0); 1228b411b363SPhilipp Reisner drbd_put_data_sock(mdev); 1229b411b363SPhilipp Reisner 1230b411b363SPhilipp Reisner return ok; 1231b411b363SPhilipp Reisner } 1232b411b363SPhilipp Reisner 1233b411b363SPhilipp Reisner int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1234b411b363SPhilipp Reisner { 1235b411b363SPhilipp Reisner if (cancel) 1236b411b363SPhilipp Reisner return 1; 1237b411b363SPhilipp Reisner return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); 1238b411b363SPhilipp Reisner } 1239b411b363SPhilipp Reisner 124073a01a18SPhilipp Reisner int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 124173a01a18SPhilipp Reisner { 124273a01a18SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 124373a01a18SPhilipp Reisner int ok; 124473a01a18SPhilipp Reisner 124573a01a18SPhilipp Reisner if (unlikely(cancel)) { 12468554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 124773a01a18SPhilipp Reisner return 1; 124873a01a18SPhilipp Reisner } 124973a01a18SPhilipp Reisner 125073a01a18SPhilipp Reisner ok = drbd_send_oos(mdev, req); 12518554df1cSAndreas Gruenbacher req_mod(req, OOS_HANDED_TO_NETWORK); 125273a01a18SPhilipp Reisner 125373a01a18SPhilipp Reisner return ok; 125473a01a18SPhilipp Reisner } 125573a01a18SPhilipp Reisner 1256b411b363SPhilipp Reisner /** 1257b411b363SPhilipp Reisner * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request 1258b411b363SPhilipp Reisner * @mdev: DRBD device. 1259b411b363SPhilipp Reisner * @w: work object. 1260b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1261b411b363SPhilipp Reisner */ 1262b411b363SPhilipp Reisner int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1263b411b363SPhilipp Reisner { 1264b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 1265b411b363SPhilipp Reisner int ok; 1266b411b363SPhilipp Reisner 1267b411b363SPhilipp Reisner if (unlikely(cancel)) { 12688554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 1269b411b363SPhilipp Reisner return 1; 1270b411b363SPhilipp Reisner } 1271b411b363SPhilipp Reisner 1272b411b363SPhilipp Reisner ok = drbd_send_dblock(mdev, req); 12738554df1cSAndreas Gruenbacher req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); 1274b411b363SPhilipp Reisner 1275b411b363SPhilipp Reisner return ok; 1276b411b363SPhilipp Reisner } 1277b411b363SPhilipp Reisner 1278b411b363SPhilipp Reisner /** 1279b411b363SPhilipp Reisner * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet 1280b411b363SPhilipp Reisner * @mdev: DRBD device. 1281b411b363SPhilipp Reisner * @w: work object. 1282b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1283b411b363SPhilipp Reisner */ 1284b411b363SPhilipp Reisner int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1285b411b363SPhilipp Reisner { 1286b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 1287b411b363SPhilipp Reisner int ok; 1288b411b363SPhilipp Reisner 1289b411b363SPhilipp Reisner if (unlikely(cancel)) { 12908554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 1291b411b363SPhilipp Reisner return 1; 1292b411b363SPhilipp Reisner } 1293b411b363SPhilipp Reisner 1294ace652acSAndreas Gruenbacher ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, 1295b411b363SPhilipp Reisner (unsigned long)req); 1296b411b363SPhilipp Reisner 1297b411b363SPhilipp Reisner if (!ok) { 1298b411b363SPhilipp Reisner /* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send(); 1299b411b363SPhilipp Reisner * so this is probably redundant */ 1300b411b363SPhilipp Reisner if (mdev->state.conn >= C_CONNECTED) 1301b411b363SPhilipp Reisner drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); 1302b411b363SPhilipp Reisner } 13038554df1cSAndreas Gruenbacher req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); 1304b411b363SPhilipp Reisner 1305b411b363SPhilipp Reisner return ok; 1306b411b363SPhilipp Reisner } 1307b411b363SPhilipp Reisner 1308265be2d0SPhilipp Reisner int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1309265be2d0SPhilipp Reisner { 1310265be2d0SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 1311265be2d0SPhilipp Reisner 13120778286aSPhilipp Reisner if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) 1313ace652acSAndreas Gruenbacher drbd_al_begin_io(mdev, req->i.sector); 1314265be2d0SPhilipp Reisner /* Calling drbd_al_begin_io() out of the worker might deadlocks 1315265be2d0SPhilipp Reisner theoretically. Practically it can not deadlock, since this is 1316265be2d0SPhilipp Reisner only used when unfreezing IOs. All the extents of the requests 1317265be2d0SPhilipp Reisner that made it into the TL are already active */ 1318265be2d0SPhilipp Reisner 1319265be2d0SPhilipp Reisner drbd_req_make_private_bio(req, req->master_bio); 1320265be2d0SPhilipp Reisner req->private_bio->bi_bdev = mdev->ldev->backing_bdev; 1321265be2d0SPhilipp Reisner generic_make_request(req->private_bio); 1322265be2d0SPhilipp Reisner 1323265be2d0SPhilipp Reisner return 1; 1324265be2d0SPhilipp Reisner } 1325265be2d0SPhilipp Reisner 1326b411b363SPhilipp Reisner static int _drbd_may_sync_now(struct drbd_conf *mdev) 1327b411b363SPhilipp Reisner { 1328b411b363SPhilipp Reisner struct drbd_conf *odev = mdev; 1329b411b363SPhilipp Reisner 1330b411b363SPhilipp Reisner while (1) { 1331b411b363SPhilipp Reisner if (odev->sync_conf.after == -1) 1332b411b363SPhilipp Reisner return 1; 1333b411b363SPhilipp Reisner odev = minor_to_mdev(odev->sync_conf.after); 1334841ce241SAndreas Gruenbacher if (!expect(odev)) 1335841ce241SAndreas Gruenbacher return 1; 1336b411b363SPhilipp Reisner if ((odev->state.conn >= C_SYNC_SOURCE && 1337b411b363SPhilipp Reisner odev->state.conn <= C_PAUSED_SYNC_T) || 1338b411b363SPhilipp Reisner odev->state.aftr_isp || odev->state.peer_isp || 1339b411b363SPhilipp Reisner odev->state.user_isp) 1340b411b363SPhilipp Reisner return 0; 1341b411b363SPhilipp Reisner } 1342b411b363SPhilipp Reisner } 1343b411b363SPhilipp Reisner 1344b411b363SPhilipp Reisner /** 1345b411b363SPhilipp Reisner * _drbd_pause_after() - Pause resync on all devices that may not resync now 1346b411b363SPhilipp Reisner * @mdev: DRBD device. 1347b411b363SPhilipp Reisner * 1348b411b363SPhilipp Reisner * Called from process context only (admin command and after_state_ch). 1349b411b363SPhilipp Reisner */ 1350b411b363SPhilipp Reisner static int _drbd_pause_after(struct drbd_conf *mdev) 1351b411b363SPhilipp Reisner { 1352b411b363SPhilipp Reisner struct drbd_conf *odev; 1353b411b363SPhilipp Reisner int i, rv = 0; 1354b411b363SPhilipp Reisner 1355b411b363SPhilipp Reisner for (i = 0; i < minor_count; i++) { 1356b411b363SPhilipp Reisner odev = minor_to_mdev(i); 1357b411b363SPhilipp Reisner if (!odev) 1358b411b363SPhilipp Reisner continue; 1359b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1360b411b363SPhilipp Reisner continue; 1361b411b363SPhilipp Reisner if (!_drbd_may_sync_now(odev)) 1362b411b363SPhilipp Reisner rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) 1363b411b363SPhilipp Reisner != SS_NOTHING_TO_DO); 1364b411b363SPhilipp Reisner } 1365b411b363SPhilipp Reisner 1366b411b363SPhilipp Reisner return rv; 1367b411b363SPhilipp Reisner } 1368b411b363SPhilipp Reisner 1369b411b363SPhilipp Reisner /** 1370b411b363SPhilipp Reisner * _drbd_resume_next() - Resume resync on all devices that may resync now 1371b411b363SPhilipp Reisner * @mdev: DRBD device. 1372b411b363SPhilipp Reisner * 1373b411b363SPhilipp Reisner * Called from process context only (admin command and worker). 1374b411b363SPhilipp Reisner */ 1375b411b363SPhilipp Reisner static int _drbd_resume_next(struct drbd_conf *mdev) 1376b411b363SPhilipp Reisner { 1377b411b363SPhilipp Reisner struct drbd_conf *odev; 1378b411b363SPhilipp Reisner int i, rv = 0; 1379b411b363SPhilipp Reisner 1380b411b363SPhilipp Reisner for (i = 0; i < minor_count; i++) { 1381b411b363SPhilipp Reisner odev = minor_to_mdev(i); 1382b411b363SPhilipp Reisner if (!odev) 1383b411b363SPhilipp Reisner continue; 1384b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1385b411b363SPhilipp Reisner continue; 1386b411b363SPhilipp Reisner if (odev->state.aftr_isp) { 1387b411b363SPhilipp Reisner if (_drbd_may_sync_now(odev)) 1388b411b363SPhilipp Reisner rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), 1389b411b363SPhilipp Reisner CS_HARD, NULL) 1390b411b363SPhilipp Reisner != SS_NOTHING_TO_DO) ; 1391b411b363SPhilipp Reisner } 1392b411b363SPhilipp Reisner } 1393b411b363SPhilipp Reisner return rv; 1394b411b363SPhilipp Reisner } 1395b411b363SPhilipp Reisner 1396b411b363SPhilipp Reisner void resume_next_sg(struct drbd_conf *mdev) 1397b411b363SPhilipp Reisner { 1398b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1399b411b363SPhilipp Reisner _drbd_resume_next(mdev); 1400b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1401b411b363SPhilipp Reisner } 1402b411b363SPhilipp Reisner 1403b411b363SPhilipp Reisner void suspend_other_sg(struct drbd_conf *mdev) 1404b411b363SPhilipp Reisner { 1405b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1406b411b363SPhilipp Reisner _drbd_pause_after(mdev); 1407b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1408b411b363SPhilipp Reisner } 1409b411b363SPhilipp Reisner 1410b411b363SPhilipp Reisner static int sync_after_error(struct drbd_conf *mdev, int o_minor) 1411b411b363SPhilipp Reisner { 1412b411b363SPhilipp Reisner struct drbd_conf *odev; 1413b411b363SPhilipp Reisner 1414b411b363SPhilipp Reisner if (o_minor == -1) 1415b411b363SPhilipp Reisner return NO_ERROR; 1416b411b363SPhilipp Reisner if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) 1417b411b363SPhilipp Reisner return ERR_SYNC_AFTER; 1418b411b363SPhilipp Reisner 1419b411b363SPhilipp Reisner /* check for loops */ 1420b411b363SPhilipp Reisner odev = minor_to_mdev(o_minor); 1421b411b363SPhilipp Reisner while (1) { 1422b411b363SPhilipp Reisner if (odev == mdev) 1423b411b363SPhilipp Reisner return ERR_SYNC_AFTER_CYCLE; 1424b411b363SPhilipp Reisner 1425b411b363SPhilipp Reisner /* dependency chain ends here, no cycles. */ 1426b411b363SPhilipp Reisner if (odev->sync_conf.after == -1) 1427b411b363SPhilipp Reisner return NO_ERROR; 1428b411b363SPhilipp Reisner 1429b411b363SPhilipp Reisner /* follow the dependency chain */ 1430b411b363SPhilipp Reisner odev = minor_to_mdev(odev->sync_conf.after); 1431b411b363SPhilipp Reisner } 1432b411b363SPhilipp Reisner } 1433b411b363SPhilipp Reisner 1434b411b363SPhilipp Reisner int drbd_alter_sa(struct drbd_conf *mdev, int na) 1435b411b363SPhilipp Reisner { 1436b411b363SPhilipp Reisner int changes; 1437b411b363SPhilipp Reisner int retcode; 1438b411b363SPhilipp Reisner 1439b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1440b411b363SPhilipp Reisner retcode = sync_after_error(mdev, na); 1441b411b363SPhilipp Reisner if (retcode == NO_ERROR) { 1442b411b363SPhilipp Reisner mdev->sync_conf.after = na; 1443b411b363SPhilipp Reisner do { 1444b411b363SPhilipp Reisner changes = _drbd_pause_after(mdev); 1445b411b363SPhilipp Reisner changes |= _drbd_resume_next(mdev); 1446b411b363SPhilipp Reisner } while (changes); 1447b411b363SPhilipp Reisner } 1448b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1449b411b363SPhilipp Reisner return retcode; 1450b411b363SPhilipp Reisner } 1451b411b363SPhilipp Reisner 14529bd28d3cSLars Ellenberg void drbd_rs_controller_reset(struct drbd_conf *mdev) 14539bd28d3cSLars Ellenberg { 14549bd28d3cSLars Ellenberg atomic_set(&mdev->rs_sect_in, 0); 14559bd28d3cSLars Ellenberg atomic_set(&mdev->rs_sect_ev, 0); 14569bd28d3cSLars Ellenberg mdev->rs_in_flight = 0; 14579bd28d3cSLars Ellenberg mdev->rs_planed = 0; 14589bd28d3cSLars Ellenberg spin_lock(&mdev->peer_seq_lock); 14599bd28d3cSLars Ellenberg fifo_set(&mdev->rs_plan_s, 0); 14609bd28d3cSLars Ellenberg spin_unlock(&mdev->peer_seq_lock); 14619bd28d3cSLars Ellenberg } 14629bd28d3cSLars Ellenberg 1463b411b363SPhilipp Reisner /** 1464b411b363SPhilipp Reisner * drbd_start_resync() - Start the resync process 1465b411b363SPhilipp Reisner * @mdev: DRBD device. 1466b411b363SPhilipp Reisner * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET 1467b411b363SPhilipp Reisner * 1468b411b363SPhilipp Reisner * This function might bring you directly into one of the 1469b411b363SPhilipp Reisner * C_PAUSED_SYNC_* states. 1470b411b363SPhilipp Reisner */ 1471b411b363SPhilipp Reisner void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) 1472b411b363SPhilipp Reisner { 1473b411b363SPhilipp Reisner union drbd_state ns; 1474b411b363SPhilipp Reisner int r; 1475b411b363SPhilipp Reisner 1476c4752ef1SPhilipp Reisner if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) { 1477b411b363SPhilipp Reisner dev_err(DEV, "Resync already running!\n"); 1478b411b363SPhilipp Reisner return; 1479b411b363SPhilipp Reisner } 1480b411b363SPhilipp Reisner 148159817f4fSPhilipp Reisner if (mdev->state.conn < C_AHEAD) { 1482b411b363SPhilipp Reisner /* In case a previous resync run was aborted by an IO error/detach on the peer. */ 1483b411b363SPhilipp Reisner drbd_rs_cancel_all(mdev); 148459817f4fSPhilipp Reisner /* This should be done when we abort the resync. We definitely do not 148559817f4fSPhilipp Reisner want to have this for connections going back and forth between 148659817f4fSPhilipp Reisner Ahead/Behind and SyncSource/SyncTarget */ 148759817f4fSPhilipp Reisner } 1488b411b363SPhilipp Reisner 1489b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) { 1490b411b363SPhilipp Reisner /* Since application IO was locked out during C_WF_BITMAP_T and 1491b411b363SPhilipp Reisner C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET 1492b411b363SPhilipp Reisner we check that we might make the data inconsistent. */ 1493b411b363SPhilipp Reisner r = drbd_khelper(mdev, "before-resync-target"); 1494b411b363SPhilipp Reisner r = (r >> 8) & 0xff; 1495b411b363SPhilipp Reisner if (r > 0) { 1496b411b363SPhilipp Reisner dev_info(DEV, "before-resync-target handler returned %d, " 1497b411b363SPhilipp Reisner "dropping connection.\n", r); 1498b411b363SPhilipp Reisner drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 1499b411b363SPhilipp Reisner return; 1500b411b363SPhilipp Reisner } 150109b9e797SPhilipp Reisner } else /* C_SYNC_SOURCE */ { 150209b9e797SPhilipp Reisner r = drbd_khelper(mdev, "before-resync-source"); 150309b9e797SPhilipp Reisner r = (r >> 8) & 0xff; 150409b9e797SPhilipp Reisner if (r > 0) { 150509b9e797SPhilipp Reisner if (r == 3) { 150609b9e797SPhilipp Reisner dev_info(DEV, "before-resync-source handler returned %d, " 150709b9e797SPhilipp Reisner "ignoring. Old userland tools?", r); 150809b9e797SPhilipp Reisner } else { 150909b9e797SPhilipp Reisner dev_info(DEV, "before-resync-source handler returned %d, " 151009b9e797SPhilipp Reisner "dropping connection.\n", r); 151109b9e797SPhilipp Reisner drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 151209b9e797SPhilipp Reisner return; 151309b9e797SPhilipp Reisner } 151409b9e797SPhilipp Reisner } 1515b411b363SPhilipp Reisner } 1516b411b363SPhilipp Reisner 1517b411b363SPhilipp Reisner drbd_state_lock(mdev); 1518b411b363SPhilipp Reisner 1519b411b363SPhilipp Reisner if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { 1520b411b363SPhilipp Reisner drbd_state_unlock(mdev); 1521b411b363SPhilipp Reisner return; 1522b411b363SPhilipp Reisner } 1523b411b363SPhilipp Reisner 1524b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1525b411b363SPhilipp Reisner ns = mdev->state; 1526b411b363SPhilipp Reisner 1527b411b363SPhilipp Reisner ns.aftr_isp = !_drbd_may_sync_now(mdev); 1528b411b363SPhilipp Reisner 1529b411b363SPhilipp Reisner ns.conn = side; 1530b411b363SPhilipp Reisner 1531b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) 1532b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 1533b411b363SPhilipp Reisner else /* side == C_SYNC_SOURCE */ 1534b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 1535b411b363SPhilipp Reisner 1536b411b363SPhilipp Reisner r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL); 1537b411b363SPhilipp Reisner ns = mdev->state; 1538b411b363SPhilipp Reisner 1539b411b363SPhilipp Reisner if (ns.conn < C_CONNECTED) 1540b411b363SPhilipp Reisner r = SS_UNKNOWN_ERROR; 1541b411b363SPhilipp Reisner 1542b411b363SPhilipp Reisner if (r == SS_SUCCESS) { 15431d7734a0SLars Ellenberg unsigned long tw = drbd_bm_total_weight(mdev); 15441d7734a0SLars Ellenberg unsigned long now = jiffies; 15451d7734a0SLars Ellenberg int i; 15461d7734a0SLars Ellenberg 1547b411b363SPhilipp Reisner mdev->rs_failed = 0; 1548b411b363SPhilipp Reisner mdev->rs_paused = 0; 1549b411b363SPhilipp Reisner mdev->rs_same_csum = 0; 15500f0601f4SLars Ellenberg mdev->rs_last_events = 0; 15510f0601f4SLars Ellenberg mdev->rs_last_sect_ev = 0; 15521d7734a0SLars Ellenberg mdev->rs_total = tw; 15531d7734a0SLars Ellenberg mdev->rs_start = now; 15541d7734a0SLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 15551d7734a0SLars Ellenberg mdev->rs_mark_left[i] = tw; 15561d7734a0SLars Ellenberg mdev->rs_mark_time[i] = now; 15571d7734a0SLars Ellenberg } 1558b411b363SPhilipp Reisner _drbd_pause_after(mdev); 1559b411b363SPhilipp Reisner } 1560b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 15615a22db89SLars Ellenberg 15626c922ed5SLars Ellenberg if (r == SS_SUCCESS) { 15636c922ed5SLars Ellenberg dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", 15646c922ed5SLars Ellenberg drbd_conn_str(ns.conn), 15656c922ed5SLars Ellenberg (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), 15666c922ed5SLars Ellenberg (unsigned long) mdev->rs_total); 15675a22db89SLars Ellenberg if (side == C_SYNC_TARGET) 15685a22db89SLars Ellenberg mdev->bm_resync_fo = 0; 15695a22db89SLars Ellenberg 15705a22db89SLars Ellenberg /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid 15715a22db89SLars Ellenberg * with w_send_oos, or the sync target will get confused as to 15725a22db89SLars Ellenberg * how much bits to resync. We cannot do that always, because for an 15735a22db89SLars Ellenberg * empty resync and protocol < 95, we need to do it here, as we call 15745a22db89SLars Ellenberg * drbd_resync_finished from here in that case. 15755a22db89SLars Ellenberg * We drbd_gen_and_send_sync_uuid here for protocol < 96, 15765a22db89SLars Ellenberg * and from after_state_ch otherwise. */ 157731890f4aSPhilipp Reisner if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96) 15785a22db89SLars Ellenberg drbd_gen_and_send_sync_uuid(mdev); 1579b411b363SPhilipp Reisner 158031890f4aSPhilipp Reisner if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) { 1581af85e8e8SLars Ellenberg /* This still has a race (about when exactly the peers 1582af85e8e8SLars Ellenberg * detect connection loss) that can lead to a full sync 1583af85e8e8SLars Ellenberg * on next handshake. In 8.3.9 we fixed this with explicit 1584af85e8e8SLars Ellenberg * resync-finished notifications, but the fix 1585af85e8e8SLars Ellenberg * introduces a protocol change. Sleeping for some 1586af85e8e8SLars Ellenberg * time longer than the ping interval + timeout on the 1587af85e8e8SLars Ellenberg * SyncSource, to give the SyncTarget the chance to 1588af85e8e8SLars Ellenberg * detect connection loss, then waiting for a ping 1589af85e8e8SLars Ellenberg * response (implicit in drbd_resync_finished) reduces 1590af85e8e8SLars Ellenberg * the race considerably, but does not solve it. */ 1591af85e8e8SLars Ellenberg if (side == C_SYNC_SOURCE) 1592af85e8e8SLars Ellenberg schedule_timeout_interruptible( 159389e58e75SPhilipp Reisner mdev->tconn->net_conf->ping_int * HZ + 159489e58e75SPhilipp Reisner mdev->tconn->net_conf->ping_timeo*HZ/9); 1595b411b363SPhilipp Reisner drbd_resync_finished(mdev); 1596b411b363SPhilipp Reisner } 1597b411b363SPhilipp Reisner 15989bd28d3cSLars Ellenberg drbd_rs_controller_reset(mdev); 1599b411b363SPhilipp Reisner /* ns.conn may already be != mdev->state.conn, 1600b411b363SPhilipp Reisner * we may have been paused in between, or become paused until 1601b411b363SPhilipp Reisner * the timer triggers. 1602b411b363SPhilipp Reisner * No matter, that is handled in resync_timer_fn() */ 1603b411b363SPhilipp Reisner if (ns.conn == C_SYNC_TARGET) 1604b411b363SPhilipp Reisner mod_timer(&mdev->resync_timer, jiffies); 1605b411b363SPhilipp Reisner 1606b411b363SPhilipp Reisner drbd_md_sync(mdev); 1607b411b363SPhilipp Reisner } 16085a22db89SLars Ellenberg put_ldev(mdev); 1609d0c3f60fSPhilipp Reisner drbd_state_unlock(mdev); 1610b411b363SPhilipp Reisner } 1611b411b363SPhilipp Reisner 1612b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi) 1613b411b363SPhilipp Reisner { 1614b411b363SPhilipp Reisner struct drbd_conf *mdev = thi->mdev; 1615b411b363SPhilipp Reisner struct drbd_work *w = NULL; 1616b411b363SPhilipp Reisner LIST_HEAD(work_list); 1617b411b363SPhilipp Reisner int intr = 0, i; 1618b411b363SPhilipp Reisner 1619b411b363SPhilipp Reisner sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); 1620b411b363SPhilipp Reisner 1621e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) { 1622b411b363SPhilipp Reisner drbd_thread_current_set_cpu(mdev); 1623b411b363SPhilipp Reisner 1624e42325a5SPhilipp Reisner if (down_trylock(&mdev->tconn->data.work.s)) { 1625e42325a5SPhilipp Reisner mutex_lock(&mdev->tconn->data.mutex); 1626e42325a5SPhilipp Reisner if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) 1627e42325a5SPhilipp Reisner drbd_tcp_uncork(mdev->tconn->data.socket); 1628e42325a5SPhilipp Reisner mutex_unlock(&mdev->tconn->data.mutex); 1629b411b363SPhilipp Reisner 1630e42325a5SPhilipp Reisner intr = down_interruptible(&mdev->tconn->data.work.s); 1631b411b363SPhilipp Reisner 1632e42325a5SPhilipp Reisner mutex_lock(&mdev->tconn->data.mutex); 1633e42325a5SPhilipp Reisner if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) 1634e42325a5SPhilipp Reisner drbd_tcp_cork(mdev->tconn->data.socket); 1635e42325a5SPhilipp Reisner mutex_unlock(&mdev->tconn->data.mutex); 1636b411b363SPhilipp Reisner } 1637b411b363SPhilipp Reisner 1638b411b363SPhilipp Reisner if (intr) { 1639b411b363SPhilipp Reisner D_ASSERT(intr == -EINTR); 1640b411b363SPhilipp Reisner flush_signals(current); 1641841ce241SAndreas Gruenbacher if (!expect(get_t_state(thi) != RUNNING)) 1642b411b363SPhilipp Reisner continue; 1643b411b363SPhilipp Reisner break; 1644b411b363SPhilipp Reisner } 1645b411b363SPhilipp Reisner 1646e77a0a5cSAndreas Gruenbacher if (get_t_state(thi) != RUNNING) 1647b411b363SPhilipp Reisner break; 1648b411b363SPhilipp Reisner /* With this break, we have done a down() but not consumed 1649b411b363SPhilipp Reisner the entry from the list. The cleanup code takes care of 1650b411b363SPhilipp Reisner this... */ 1651b411b363SPhilipp Reisner 1652b411b363SPhilipp Reisner w = NULL; 1653e42325a5SPhilipp Reisner spin_lock_irq(&mdev->tconn->data.work.q_lock); 1654e42325a5SPhilipp Reisner if (!expect(!list_empty(&mdev->tconn->data.work.q))) { 1655b411b363SPhilipp Reisner /* something terribly wrong in our logic. 1656b411b363SPhilipp Reisner * we were able to down() the semaphore, 1657b411b363SPhilipp Reisner * but the list is empty... doh. 1658b411b363SPhilipp Reisner * 1659b411b363SPhilipp Reisner * what is the best thing to do now? 1660b411b363SPhilipp Reisner * try again from scratch, restarting the receiver, 1661b411b363SPhilipp Reisner * asender, whatnot? could break even more ugly, 1662b411b363SPhilipp Reisner * e.g. when we are primary, but no good local data. 1663b411b363SPhilipp Reisner * 1664b411b363SPhilipp Reisner * I'll try to get away just starting over this loop. 1665b411b363SPhilipp Reisner */ 1666e42325a5SPhilipp Reisner spin_unlock_irq(&mdev->tconn->data.work.q_lock); 1667b411b363SPhilipp Reisner continue; 1668b411b363SPhilipp Reisner } 1669e42325a5SPhilipp Reisner w = list_entry(mdev->tconn->data.work.q.next, struct drbd_work, list); 1670b411b363SPhilipp Reisner list_del_init(&w->list); 1671e42325a5SPhilipp Reisner spin_unlock_irq(&mdev->tconn->data.work.q_lock); 1672b411b363SPhilipp Reisner 1673b411b363SPhilipp Reisner if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) { 1674b411b363SPhilipp Reisner /* dev_warn(DEV, "worker: a callback failed! \n"); */ 1675b411b363SPhilipp Reisner if (mdev->state.conn >= C_CONNECTED) 1676b411b363SPhilipp Reisner drbd_force_state(mdev, 1677b411b363SPhilipp Reisner NS(conn, C_NETWORK_FAILURE)); 1678b411b363SPhilipp Reisner } 1679b411b363SPhilipp Reisner } 1680b411b363SPhilipp Reisner D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); 1681b411b363SPhilipp Reisner D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); 1682b411b363SPhilipp Reisner 1683e42325a5SPhilipp Reisner spin_lock_irq(&mdev->tconn->data.work.q_lock); 1684b411b363SPhilipp Reisner i = 0; 1685e42325a5SPhilipp Reisner while (!list_empty(&mdev->tconn->data.work.q)) { 1686e42325a5SPhilipp Reisner list_splice_init(&mdev->tconn->data.work.q, &work_list); 1687e42325a5SPhilipp Reisner spin_unlock_irq(&mdev->tconn->data.work.q_lock); 1688b411b363SPhilipp Reisner 1689b411b363SPhilipp Reisner while (!list_empty(&work_list)) { 1690b411b363SPhilipp Reisner w = list_entry(work_list.next, struct drbd_work, list); 1691b411b363SPhilipp Reisner list_del_init(&w->list); 1692b411b363SPhilipp Reisner w->cb(mdev, w, 1); 1693b411b363SPhilipp Reisner i++; /* dead debugging code */ 1694b411b363SPhilipp Reisner } 1695b411b363SPhilipp Reisner 1696e42325a5SPhilipp Reisner spin_lock_irq(&mdev->tconn->data.work.q_lock); 1697b411b363SPhilipp Reisner } 1698e42325a5SPhilipp Reisner sema_init(&mdev->tconn->data.work.s, 0); 1699b411b363SPhilipp Reisner /* DANGEROUS race: if someone did queue his work within the spinlock, 1700b411b363SPhilipp Reisner * but up() ed outside the spinlock, we could get an up() on the 1701b411b363SPhilipp Reisner * semaphore without corresponding list entry. 1702b411b363SPhilipp Reisner * So don't do that. 1703b411b363SPhilipp Reisner */ 1704e42325a5SPhilipp Reisner spin_unlock_irq(&mdev->tconn->data.work.q_lock); 1705b411b363SPhilipp Reisner 1706b411b363SPhilipp Reisner D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); 1707b411b363SPhilipp Reisner /* _drbd_set_state only uses stop_nowait. 1708e6b3ea83SPhilipp Reisner * wait here for the exiting receiver. */ 1709e6b3ea83SPhilipp Reisner drbd_thread_stop(&mdev->tconn->receiver); 1710b411b363SPhilipp Reisner drbd_mdev_cleanup(mdev); 1711b411b363SPhilipp Reisner 1712b411b363SPhilipp Reisner dev_info(DEV, "worker terminated\n"); 1713b411b363SPhilipp Reisner 1714b411b363SPhilipp Reisner clear_bit(DEVICE_DYING, &mdev->flags); 1715b411b363SPhilipp Reisner clear_bit(CONFIG_PENDING, &mdev->flags); 1716b411b363SPhilipp Reisner wake_up(&mdev->state_wait); 1717b411b363SPhilipp Reisner 1718b411b363SPhilipp Reisner return 0; 1719b411b363SPhilipp Reisner } 1720