1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_worker.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner 24b411b363SPhilipp Reisner */ 25b411b363SPhilipp Reisner 26b411b363SPhilipp Reisner #include <linux/module.h> 27b411b363SPhilipp Reisner #include <linux/drbd.h> 28b411b363SPhilipp Reisner #include <linux/sched.h> 29b411b363SPhilipp Reisner #include <linux/wait.h> 30b411b363SPhilipp Reisner #include <linux/mm.h> 31b411b363SPhilipp Reisner #include <linux/memcontrol.h> 32b411b363SPhilipp Reisner #include <linux/mm_inline.h> 33b411b363SPhilipp Reisner #include <linux/slab.h> 34b411b363SPhilipp Reisner #include <linux/random.h> 35b411b363SPhilipp Reisner #include <linux/string.h> 36b411b363SPhilipp Reisner #include <linux/scatterlist.h> 37b411b363SPhilipp Reisner 38b411b363SPhilipp Reisner #include "drbd_int.h" 39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h" 40b411b363SPhilipp Reisner #include "drbd_req.h" 41b411b363SPhilipp Reisner 42d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *, int); 43d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *, int); 44b411b363SPhilipp Reisner 45c5a91619SAndreas Gruenbacher /* endio handlers: 46c5a91619SAndreas Gruenbacher * drbd_md_io_complete (defined here) 47fcefa62eSAndreas Gruenbacher * drbd_request_endio (defined here) 48fcefa62eSAndreas Gruenbacher * drbd_peer_request_endio (defined here) 49c5a91619SAndreas Gruenbacher * bm_async_io_complete (defined in drbd_bitmap.c) 50c5a91619SAndreas Gruenbacher * 51b411b363SPhilipp Reisner * For all these callbacks, note the following: 52b411b363SPhilipp Reisner * The callbacks will be called in irq context by the IDE drivers, 53b411b363SPhilipp Reisner * and in Softirqs/Tasklets/BH context by the SCSI drivers. 54b411b363SPhilipp Reisner * Try to get the locking right :) 55b411b363SPhilipp Reisner * 56b411b363SPhilipp Reisner */ 57b411b363SPhilipp Reisner 58b411b363SPhilipp Reisner 59b411b363SPhilipp Reisner /* About the global_state_lock 60b411b363SPhilipp Reisner Each state transition on an device holds a read lock. In case we have 6195f8efd0SAndreas Gruenbacher to evaluate the resync after dependencies, we grab a write lock, because 62b411b363SPhilipp Reisner we need stable states on all devices for that. */ 63b411b363SPhilipp Reisner rwlock_t global_state_lock; 64b411b363SPhilipp Reisner 65b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO 66b411b363SPhilipp Reisner * submitted by drbd_md_sync_page_io() 67b411b363SPhilipp Reisner */ 68b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error) 69b411b363SPhilipp Reisner { 70b411b363SPhilipp Reisner struct drbd_md_io *md_io; 71b30ab791SAndreas Gruenbacher struct drbd_device *device; 72b411b363SPhilipp Reisner 73b411b363SPhilipp Reisner md_io = (struct drbd_md_io *)bio->bi_private; 74b30ab791SAndreas Gruenbacher device = container_of(md_io, struct drbd_device, md_io); 75cdfda633SPhilipp Reisner 76b411b363SPhilipp Reisner md_io->error = error; 77b411b363SPhilipp Reisner 780cfac5ddSPhilipp Reisner /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able 790cfac5ddSPhilipp Reisner * to timeout on the lower level device, and eventually detach from it. 800cfac5ddSPhilipp Reisner * If this io completion runs after that timeout expired, this 810cfac5ddSPhilipp Reisner * drbd_md_put_buffer() may allow us to finally try and re-attach. 820cfac5ddSPhilipp Reisner * During normal operation, this only puts that extra reference 830cfac5ddSPhilipp Reisner * down to 1 again. 840cfac5ddSPhilipp Reisner * Make sure we first drop the reference, and only then signal 850cfac5ddSPhilipp Reisner * completion, or we may (in drbd_al_read_log()) cycle so fast into the 860cfac5ddSPhilipp Reisner * next drbd_md_sync_page_io(), that we trigger the 87b30ab791SAndreas Gruenbacher * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. 880cfac5ddSPhilipp Reisner */ 89b30ab791SAndreas Gruenbacher drbd_md_put_buffer(device); 90cdfda633SPhilipp Reisner md_io->done = 1; 91b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 92cdfda633SPhilipp Reisner bio_put(bio); 93b30ab791SAndreas Gruenbacher if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ 94b30ab791SAndreas Gruenbacher put_ldev(device); 95b411b363SPhilipp Reisner } 96b411b363SPhilipp Reisner 97b411b363SPhilipp Reisner /* reads on behalf of the partner, 98b411b363SPhilipp Reisner * "submitted" by the receiver 99b411b363SPhilipp Reisner */ 100a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) 101b411b363SPhilipp Reisner { 102b411b363SPhilipp Reisner unsigned long flags = 0; 1036780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1046780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 105b411b363SPhilipp Reisner 1060500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 107b30ab791SAndreas Gruenbacher device->read_cnt += peer_req->i.size >> 9; 108a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 109b30ab791SAndreas Gruenbacher if (list_empty(&device->read_ee)) 110b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 111db830c46SAndreas Gruenbacher if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) 112b30ab791SAndreas Gruenbacher __drbd_chk_io_error(device, DRBD_READ_ERROR); 1130500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 114b411b363SPhilipp Reisner 1156780139cSAndreas Gruenbacher drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w); 116b30ab791SAndreas Gruenbacher put_ldev(device); 117b411b363SPhilipp Reisner } 118b411b363SPhilipp Reisner 119b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes, 12045bb912bSLars Ellenberg * "submitted" by the receiver, final stage. */ 121a0fb3c47SLars Ellenberg void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) 122b411b363SPhilipp Reisner { 123b411b363SPhilipp Reisner unsigned long flags = 0; 1246780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1256780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 126181286adSLars Ellenberg struct drbd_interval i; 127b411b363SPhilipp Reisner int do_wake; 128579b57edSAndreas Gruenbacher u64 block_id; 129b411b363SPhilipp Reisner int do_al_complete_io; 130b411b363SPhilipp Reisner 131db830c46SAndreas Gruenbacher /* after we moved peer_req to done_ee, 132b411b363SPhilipp Reisner * we may no longer access it, 133b411b363SPhilipp Reisner * it may be freed/reused already! 134b411b363SPhilipp Reisner * (as soon as we release the req_lock) */ 135181286adSLars Ellenberg i = peer_req->i; 136db830c46SAndreas Gruenbacher do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; 137db830c46SAndreas Gruenbacher block_id = peer_req->block_id; 138b411b363SPhilipp Reisner 1390500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 140b30ab791SAndreas Gruenbacher device->writ_cnt += peer_req->i.size >> 9; 141a8cd15baSAndreas Gruenbacher list_move_tail(&peer_req->w.list, &device->done_ee); 142b411b363SPhilipp Reisner 143bb3bfe96SAndreas Gruenbacher /* 1445e472264SAndreas Gruenbacher * Do not remove from the write_requests tree here: we did not send the 145bb3bfe96SAndreas Gruenbacher * Ack yet and did not wake possibly waiting conflicting requests. 146bb3bfe96SAndreas Gruenbacher * Removed from the tree from "drbd_process_done_ee" within the 14784b8c06bSAndreas Gruenbacher * appropriate dw.cb (e_end_block/e_end_resync_block) or from 148bb3bfe96SAndreas Gruenbacher * _drbd_clear_done_ee. 149bb3bfe96SAndreas Gruenbacher */ 150b411b363SPhilipp Reisner 151b30ab791SAndreas Gruenbacher do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); 152b411b363SPhilipp Reisner 153a0fb3c47SLars Ellenberg /* FIXME do we want to detach for failed REQ_DISCARD? 154a0fb3c47SLars Ellenberg * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ 155a0fb3c47SLars Ellenberg if (peer_req->flags & EE_WAS_ERROR) 156b30ab791SAndreas Gruenbacher __drbd_chk_io_error(device, DRBD_WRITE_ERROR); 1570500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 158b411b363SPhilipp Reisner 159579b57edSAndreas Gruenbacher if (block_id == ID_SYNCER) 160b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, i.sector); 161b411b363SPhilipp Reisner 162b411b363SPhilipp Reisner if (do_wake) 163b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 164b411b363SPhilipp Reisner 165b411b363SPhilipp Reisner if (do_al_complete_io) 166b30ab791SAndreas Gruenbacher drbd_al_complete_io(device, &i); 167b411b363SPhilipp Reisner 1686780139cSAndreas Gruenbacher wake_asender(peer_device->connection); 169b30ab791SAndreas Gruenbacher put_ldev(device); 17045bb912bSLars Ellenberg } 171b411b363SPhilipp Reisner 17245bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes, 17345bb912bSLars Ellenberg * "submitted" by the receiver. 17445bb912bSLars Ellenberg */ 175fcefa62eSAndreas Gruenbacher void drbd_peer_request_endio(struct bio *bio, int error) 17645bb912bSLars Ellenberg { 177db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req = bio->bi_private; 178a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_req->peer_device->device; 17945bb912bSLars Ellenberg int uptodate = bio_flagged(bio, BIO_UPTODATE); 18045bb912bSLars Ellenberg int is_write = bio_data_dir(bio) == WRITE; 181a0fb3c47SLars Ellenberg int is_discard = !!(bio->bi_rw & REQ_DISCARD); 18245bb912bSLars Ellenberg 18307194272SLars Ellenberg if (error && __ratelimit(&drbd_ratelimit_state)) 184d0180171SAndreas Gruenbacher drbd_warn(device, "%s: error=%d s=%llus\n", 185a0fb3c47SLars Ellenberg is_write ? (is_discard ? "discard" : "write") 186a0fb3c47SLars Ellenberg : "read", error, 187db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 18845bb912bSLars Ellenberg if (!error && !uptodate) { 18907194272SLars Ellenberg if (__ratelimit(&drbd_ratelimit_state)) 190d0180171SAndreas Gruenbacher drbd_warn(device, "%s: setting error to -EIO s=%llus\n", 19145bb912bSLars Ellenberg is_write ? "write" : "read", 192db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 19345bb912bSLars Ellenberg /* strange behavior of some lower level drivers... 19445bb912bSLars Ellenberg * fail the request by clearing the uptodate flag, 19545bb912bSLars Ellenberg * but do not return any error?! */ 19645bb912bSLars Ellenberg error = -EIO; 19745bb912bSLars Ellenberg } 19845bb912bSLars Ellenberg 19945bb912bSLars Ellenberg if (error) 200db830c46SAndreas Gruenbacher set_bit(__EE_WAS_ERROR, &peer_req->flags); 20145bb912bSLars Ellenberg 20245bb912bSLars Ellenberg bio_put(bio); /* no need for the bio anymore */ 203db830c46SAndreas Gruenbacher if (atomic_dec_and_test(&peer_req->pending_bios)) { 20445bb912bSLars Ellenberg if (is_write) 205db830c46SAndreas Gruenbacher drbd_endio_write_sec_final(peer_req); 20645bb912bSLars Ellenberg else 207db830c46SAndreas Gruenbacher drbd_endio_read_sec_final(peer_req); 20845bb912bSLars Ellenberg } 209b411b363SPhilipp Reisner } 210b411b363SPhilipp Reisner 211b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request 212b411b363SPhilipp Reisner */ 213fcefa62eSAndreas Gruenbacher void drbd_request_endio(struct bio *bio, int error) 214b411b363SPhilipp Reisner { 215a115413dSLars Ellenberg unsigned long flags; 216b411b363SPhilipp Reisner struct drbd_request *req = bio->bi_private; 21784b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 218a115413dSLars Ellenberg struct bio_and_error m; 219b411b363SPhilipp Reisner enum drbd_req_event what; 220b411b363SPhilipp Reisner int uptodate = bio_flagged(bio, BIO_UPTODATE); 221b411b363SPhilipp Reisner 222b411b363SPhilipp Reisner if (!error && !uptodate) { 223d0180171SAndreas Gruenbacher drbd_warn(device, "p %s: setting error to -EIO\n", 224b411b363SPhilipp Reisner bio_data_dir(bio) == WRITE ? "write" : "read"); 225b411b363SPhilipp Reisner /* strange behavior of some lower level drivers... 226b411b363SPhilipp Reisner * fail the request by clearing the uptodate flag, 227b411b363SPhilipp Reisner * but do not return any error?! */ 228b411b363SPhilipp Reisner error = -EIO; 229b411b363SPhilipp Reisner } 230b411b363SPhilipp Reisner 2311b6dd252SPhilipp Reisner 2321b6dd252SPhilipp Reisner /* If this request was aborted locally before, 2331b6dd252SPhilipp Reisner * but now was completed "successfully", 2341b6dd252SPhilipp Reisner * chances are that this caused arbitrary data corruption. 2351b6dd252SPhilipp Reisner * 2361b6dd252SPhilipp Reisner * "aborting" requests, or force-detaching the disk, is intended for 2371b6dd252SPhilipp Reisner * completely blocked/hung local backing devices which do no longer 2381b6dd252SPhilipp Reisner * complete requests at all, not even do error completions. In this 2391b6dd252SPhilipp Reisner * situation, usually a hard-reset and failover is the only way out. 2401b6dd252SPhilipp Reisner * 2411b6dd252SPhilipp Reisner * By "aborting", basically faking a local error-completion, 2421b6dd252SPhilipp Reisner * we allow for a more graceful swichover by cleanly migrating services. 2431b6dd252SPhilipp Reisner * Still the affected node has to be rebooted "soon". 2441b6dd252SPhilipp Reisner * 2451b6dd252SPhilipp Reisner * By completing these requests, we allow the upper layers to re-use 2461b6dd252SPhilipp Reisner * the associated data pages. 2471b6dd252SPhilipp Reisner * 2481b6dd252SPhilipp Reisner * If later the local backing device "recovers", and now DMAs some data 2491b6dd252SPhilipp Reisner * from disk into the original request pages, in the best case it will 2501b6dd252SPhilipp Reisner * just put random data into unused pages; but typically it will corrupt 2511b6dd252SPhilipp Reisner * meanwhile completely unrelated data, causing all sorts of damage. 2521b6dd252SPhilipp Reisner * 2531b6dd252SPhilipp Reisner * Which means delayed successful completion, 2541b6dd252SPhilipp Reisner * especially for READ requests, 2551b6dd252SPhilipp Reisner * is a reason to panic(). 2561b6dd252SPhilipp Reisner * 2571b6dd252SPhilipp Reisner * We assume that a delayed *error* completion is OK, 2581b6dd252SPhilipp Reisner * though we still will complain noisily about it. 2591b6dd252SPhilipp Reisner */ 2601b6dd252SPhilipp Reisner if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { 2611b6dd252SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 262d0180171SAndreas Gruenbacher drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); 2631b6dd252SPhilipp Reisner 2641b6dd252SPhilipp Reisner if (!error) 2651b6dd252SPhilipp Reisner panic("possible random memory corruption caused by delayed completion of aborted local request\n"); 2661b6dd252SPhilipp Reisner } 2671b6dd252SPhilipp Reisner 268b411b363SPhilipp Reisner /* to avoid recursion in __req_mod */ 269b411b363SPhilipp Reisner if (unlikely(error)) { 2702f632aebSLars Ellenberg if (bio->bi_rw & REQ_DISCARD) 2712f632aebSLars Ellenberg what = (error == -EOPNOTSUPP) 2722f632aebSLars Ellenberg ? DISCARD_COMPLETED_NOTSUPP 2732f632aebSLars Ellenberg : DISCARD_COMPLETED_WITH_ERROR; 2742f632aebSLars Ellenberg else 275b411b363SPhilipp Reisner what = (bio_data_dir(bio) == WRITE) 2768554df1cSAndreas Gruenbacher ? WRITE_COMPLETED_WITH_ERROR 2775c3c7e64SLars Ellenberg : (bio_rw(bio) == READ) 2788554df1cSAndreas Gruenbacher ? READ_COMPLETED_WITH_ERROR 2798554df1cSAndreas Gruenbacher : READ_AHEAD_COMPLETED_WITH_ERROR; 280b411b363SPhilipp Reisner } else 2818554df1cSAndreas Gruenbacher what = COMPLETED_OK; 282b411b363SPhilipp Reisner 283b411b363SPhilipp Reisner bio_put(req->private_bio); 284b411b363SPhilipp Reisner req->private_bio = ERR_PTR(error); 285b411b363SPhilipp Reisner 286a115413dSLars Ellenberg /* not req_mod(), we need irqsave here! */ 2870500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 288a115413dSLars Ellenberg __req_mod(req, what, &m); 2890500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 290b30ab791SAndreas Gruenbacher put_ldev(device); 291a115413dSLars Ellenberg 292a115413dSLars Ellenberg if (m.bio) 293b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 294b411b363SPhilipp Reisner } 295b411b363SPhilipp Reisner 29679a3c8d3SAndreas Gruenbacher void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest) 29745bb912bSLars Ellenberg { 29845bb912bSLars Ellenberg struct hash_desc desc; 29945bb912bSLars Ellenberg struct scatterlist sg; 300db830c46SAndreas Gruenbacher struct page *page = peer_req->pages; 30145bb912bSLars Ellenberg struct page *tmp; 30245bb912bSLars Ellenberg unsigned len; 30345bb912bSLars Ellenberg 30445bb912bSLars Ellenberg desc.tfm = tfm; 30545bb912bSLars Ellenberg desc.flags = 0; 30645bb912bSLars Ellenberg 30745bb912bSLars Ellenberg sg_init_table(&sg, 1); 30845bb912bSLars Ellenberg crypto_hash_init(&desc); 30945bb912bSLars Ellenberg 31045bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) { 31145bb912bSLars Ellenberg /* all but the last page will be fully used */ 31245bb912bSLars Ellenberg sg_set_page(&sg, page, PAGE_SIZE, 0); 31345bb912bSLars Ellenberg crypto_hash_update(&desc, &sg, sg.length); 31445bb912bSLars Ellenberg page = tmp; 31545bb912bSLars Ellenberg } 31645bb912bSLars Ellenberg /* and now the last, possibly only partially used page */ 317db830c46SAndreas Gruenbacher len = peer_req->i.size & (PAGE_SIZE - 1); 31845bb912bSLars Ellenberg sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); 31945bb912bSLars Ellenberg crypto_hash_update(&desc, &sg, sg.length); 32045bb912bSLars Ellenberg crypto_hash_final(&desc, digest); 32145bb912bSLars Ellenberg } 32245bb912bSLars Ellenberg 32379a3c8d3SAndreas Gruenbacher void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest) 324b411b363SPhilipp Reisner { 325b411b363SPhilipp Reisner struct hash_desc desc; 326b411b363SPhilipp Reisner struct scatterlist sg; 3277988613bSKent Overstreet struct bio_vec bvec; 3287988613bSKent Overstreet struct bvec_iter iter; 329b411b363SPhilipp Reisner 330b411b363SPhilipp Reisner desc.tfm = tfm; 331b411b363SPhilipp Reisner desc.flags = 0; 332b411b363SPhilipp Reisner 333b411b363SPhilipp Reisner sg_init_table(&sg, 1); 334b411b363SPhilipp Reisner crypto_hash_init(&desc); 335b411b363SPhilipp Reisner 3367988613bSKent Overstreet bio_for_each_segment(bvec, bio, iter) { 3377988613bSKent Overstreet sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 338b411b363SPhilipp Reisner crypto_hash_update(&desc, &sg, sg.length); 339b411b363SPhilipp Reisner } 340b411b363SPhilipp Reisner crypto_hash_final(&desc, digest); 341b411b363SPhilipp Reisner } 342b411b363SPhilipp Reisner 3439676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */ 34499920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel) 345b411b363SPhilipp Reisner { 346a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 3476780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 3486780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 349b411b363SPhilipp Reisner int digest_size; 350b411b363SPhilipp Reisner void *digest; 35199920dc5SAndreas Gruenbacher int err = 0; 352b411b363SPhilipp Reisner 35353ea4331SLars Ellenberg if (unlikely(cancel)) 35453ea4331SLars Ellenberg goto out; 355b411b363SPhilipp Reisner 3569676c760SLars Ellenberg if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) 35753ea4331SLars Ellenberg goto out; 35853ea4331SLars Ellenberg 3596780139cSAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm); 360b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 361b411b363SPhilipp Reisner if (digest) { 362db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 363db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 3646780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 3659676c760SLars Ellenberg /* Free peer_req and pages before send. 36653ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 36753ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 36853ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 369c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 370b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 371db830c46SAndreas Gruenbacher peer_req = NULL; 372b30ab791SAndreas Gruenbacher inc_rs_pending(device); 3736780139cSAndreas Gruenbacher err = drbd_send_drequest_csum(peer_device, sector, size, 37453ea4331SLars Ellenberg digest, digest_size, 375b411b363SPhilipp Reisner P_CSUM_RS_REQUEST); 376b411b363SPhilipp Reisner kfree(digest); 377b411b363SPhilipp Reisner } else { 378d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc() of digest failed.\n"); 37999920dc5SAndreas Gruenbacher err = -ENOMEM; 380b411b363SPhilipp Reisner } 381b411b363SPhilipp Reisner 38253ea4331SLars Ellenberg out: 383db830c46SAndreas Gruenbacher if (peer_req) 384b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 385b411b363SPhilipp Reisner 38699920dc5SAndreas Gruenbacher if (unlikely(err)) 387d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_drequest(..., csum) failed\n"); 38899920dc5SAndreas Gruenbacher return err; 389b411b363SPhilipp Reisner } 390b411b363SPhilipp Reisner 391b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 392b411b363SPhilipp Reisner 39369a22773SAndreas Gruenbacher static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size) 394b411b363SPhilipp Reisner { 39569a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 396db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 397b411b363SPhilipp Reisner 398b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 39980a40e43SLars Ellenberg return -EIO; 400b411b363SPhilipp Reisner 401b30ab791SAndreas Gruenbacher if (drbd_rs_should_slow_down(device, sector)) 4020f0601f4SLars Ellenberg goto defer; 4030f0601f4SLars Ellenberg 404b411b363SPhilipp Reisner /* GFP_TRY, because if there is no memory available right now, this may 405b411b363SPhilipp Reisner * be rescheduled for later. It is "only" background resync, after all. */ 40669a22773SAndreas Gruenbacher peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, 407a0fb3c47SLars Ellenberg size, true /* has real payload */, GFP_TRY); 408db830c46SAndreas Gruenbacher if (!peer_req) 40980a40e43SLars Ellenberg goto defer; 410b411b363SPhilipp Reisner 411a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_send_csum; 4120500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 413a8cd15baSAndreas Gruenbacher list_add(&peer_req->w.list, &device->read_ee); 4140500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 415b411b363SPhilipp Reisner 416b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_ev); 417b30ab791SAndreas Gruenbacher if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0) 41880a40e43SLars Ellenberg return 0; 41945bb912bSLars Ellenberg 42010f6d992SLars Ellenberg /* If it failed because of ENOMEM, retry should help. If it failed 42110f6d992SLars Ellenberg * because bio_add_page failed (probably broken lower level driver), 42210f6d992SLars Ellenberg * retry may or may not help. 42310f6d992SLars Ellenberg * If it does not, you may need to force disconnect. */ 4240500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 425a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 4260500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 42722cc37a9SLars Ellenberg 428b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 42980a40e43SLars Ellenberg defer: 430b30ab791SAndreas Gruenbacher put_ldev(device); 43180a40e43SLars Ellenberg return -EAGAIN; 432b411b363SPhilipp Reisner } 433b411b363SPhilipp Reisner 43499920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel) 435794abb75SPhilipp Reisner { 43684b8c06bSAndreas Gruenbacher struct drbd_device *device = 43784b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, resync_work); 43884b8c06bSAndreas Gruenbacher 439b30ab791SAndreas Gruenbacher switch (device->state.conn) { 440794abb75SPhilipp Reisner case C_VERIFY_S: 441d448a2e1SAndreas Gruenbacher make_ov_request(device, cancel); 442794abb75SPhilipp Reisner break; 443794abb75SPhilipp Reisner case C_SYNC_TARGET: 444d448a2e1SAndreas Gruenbacher make_resync_request(device, cancel); 445794abb75SPhilipp Reisner break; 446794abb75SPhilipp Reisner } 447794abb75SPhilipp Reisner 44899920dc5SAndreas Gruenbacher return 0; 449794abb75SPhilipp Reisner } 450794abb75SPhilipp Reisner 451b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data) 452b411b363SPhilipp Reisner { 453b30ab791SAndreas Gruenbacher struct drbd_device *device = (struct drbd_device *) data; 454b411b363SPhilipp Reisner 455b30ab791SAndreas Gruenbacher if (list_empty(&device->resync_work.list)) 45684b8c06bSAndreas Gruenbacher drbd_queue_work(&first_peer_device(device)->connection->sender_work, 45784b8c06bSAndreas Gruenbacher &device->resync_work); 458b411b363SPhilipp Reisner } 459b411b363SPhilipp Reisner 460778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value) 461778f271dSPhilipp Reisner { 462778f271dSPhilipp Reisner int i; 463778f271dSPhilipp Reisner 464778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 465f10f2623SPhilipp Reisner fb->values[i] = value; 466778f271dSPhilipp Reisner } 467778f271dSPhilipp Reisner 468778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value) 469778f271dSPhilipp Reisner { 470778f271dSPhilipp Reisner int ov; 471778f271dSPhilipp Reisner 472778f271dSPhilipp Reisner ov = fb->values[fb->head_index]; 473778f271dSPhilipp Reisner fb->values[fb->head_index++] = value; 474778f271dSPhilipp Reisner 475778f271dSPhilipp Reisner if (fb->head_index >= fb->size) 476778f271dSPhilipp Reisner fb->head_index = 0; 477778f271dSPhilipp Reisner 478778f271dSPhilipp Reisner return ov; 479778f271dSPhilipp Reisner } 480778f271dSPhilipp Reisner 481778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value) 482778f271dSPhilipp Reisner { 483778f271dSPhilipp Reisner int i; 484778f271dSPhilipp Reisner 485778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 486778f271dSPhilipp Reisner fb->values[i] += value; 487778f271dSPhilipp Reisner } 488778f271dSPhilipp Reisner 4899958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size) 4909958c857SPhilipp Reisner { 4919958c857SPhilipp Reisner struct fifo_buffer *fb; 4929958c857SPhilipp Reisner 4938747d30aSLars Ellenberg fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO); 4949958c857SPhilipp Reisner if (!fb) 4959958c857SPhilipp Reisner return NULL; 4969958c857SPhilipp Reisner 4979958c857SPhilipp Reisner fb->head_index = 0; 4989958c857SPhilipp Reisner fb->size = fifo_size; 4999958c857SPhilipp Reisner fb->total = 0; 5009958c857SPhilipp Reisner 5019958c857SPhilipp Reisner return fb; 5029958c857SPhilipp Reisner } 5039958c857SPhilipp Reisner 5040e49d7b0SLars Ellenberg static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) 505778f271dSPhilipp Reisner { 506daeda1ccSPhilipp Reisner struct disk_conf *dc; 507778f271dSPhilipp Reisner unsigned int want; /* The number of sectors we want in the proxy */ 508778f271dSPhilipp Reisner int req_sect; /* Number of sectors to request in this turn */ 509778f271dSPhilipp Reisner int correction; /* Number of sectors more we need in the proxy*/ 510778f271dSPhilipp Reisner int cps; /* correction per invocation of drbd_rs_controller() */ 511778f271dSPhilipp Reisner int steps; /* Number of time steps to plan ahead */ 512778f271dSPhilipp Reisner int curr_corr; 513778f271dSPhilipp Reisner int max_sect; 514813472ceSPhilipp Reisner struct fifo_buffer *plan; 515778f271dSPhilipp Reisner 516b30ab791SAndreas Gruenbacher dc = rcu_dereference(device->ldev->disk_conf); 517b30ab791SAndreas Gruenbacher plan = rcu_dereference(device->rs_plan_s); 518778f271dSPhilipp Reisner 519813472ceSPhilipp Reisner steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ 520778f271dSPhilipp Reisner 521b30ab791SAndreas Gruenbacher if (device->rs_in_flight + sect_in == 0) { /* At start of resync */ 522daeda1ccSPhilipp Reisner want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; 523778f271dSPhilipp Reisner } else { /* normal path */ 524daeda1ccSPhilipp Reisner want = dc->c_fill_target ? dc->c_fill_target : 525daeda1ccSPhilipp Reisner sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); 526778f271dSPhilipp Reisner } 527778f271dSPhilipp Reisner 528b30ab791SAndreas Gruenbacher correction = want - device->rs_in_flight - plan->total; 529778f271dSPhilipp Reisner 530778f271dSPhilipp Reisner /* Plan ahead */ 531778f271dSPhilipp Reisner cps = correction / steps; 532813472ceSPhilipp Reisner fifo_add_val(plan, cps); 533813472ceSPhilipp Reisner plan->total += cps * steps; 534778f271dSPhilipp Reisner 535778f271dSPhilipp Reisner /* What we do in this step */ 536813472ceSPhilipp Reisner curr_corr = fifo_push(plan, 0); 537813472ceSPhilipp Reisner plan->total -= curr_corr; 538778f271dSPhilipp Reisner 539778f271dSPhilipp Reisner req_sect = sect_in + curr_corr; 540778f271dSPhilipp Reisner if (req_sect < 0) 541778f271dSPhilipp Reisner req_sect = 0; 542778f271dSPhilipp Reisner 543daeda1ccSPhilipp Reisner max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ; 544778f271dSPhilipp Reisner if (req_sect > max_sect) 545778f271dSPhilipp Reisner req_sect = max_sect; 546778f271dSPhilipp Reisner 547778f271dSPhilipp Reisner /* 548d0180171SAndreas Gruenbacher drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", 549b30ab791SAndreas Gruenbacher sect_in, device->rs_in_flight, want, correction, 550b30ab791SAndreas Gruenbacher steps, cps, device->rs_planed, curr_corr, req_sect); 551778f271dSPhilipp Reisner */ 552778f271dSPhilipp Reisner 553778f271dSPhilipp Reisner return req_sect; 554778f271dSPhilipp Reisner } 555778f271dSPhilipp Reisner 556b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device) 557e65f440dSLars Ellenberg { 5580e49d7b0SLars Ellenberg unsigned int sect_in; /* Number of sectors that came in since the last turn */ 5590e49d7b0SLars Ellenberg int number, mxb; 5600e49d7b0SLars Ellenberg 5610e49d7b0SLars Ellenberg sect_in = atomic_xchg(&device->rs_sect_in, 0); 5620e49d7b0SLars Ellenberg device->rs_in_flight -= sect_in; 563813472ceSPhilipp Reisner 564813472ceSPhilipp Reisner rcu_read_lock(); 5650e49d7b0SLars Ellenberg mxb = drbd_get_max_buffers(device) / 2; 566b30ab791SAndreas Gruenbacher if (rcu_dereference(device->rs_plan_s)->size) { 5670e49d7b0SLars Ellenberg number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); 568b30ab791SAndreas Gruenbacher device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; 569e65f440dSLars Ellenberg } else { 570b30ab791SAndreas Gruenbacher device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; 571b30ab791SAndreas Gruenbacher number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 572e65f440dSLars Ellenberg } 573813472ceSPhilipp Reisner rcu_read_unlock(); 574e65f440dSLars Ellenberg 5750e49d7b0SLars Ellenberg /* Don't have more than "max-buffers"/2 in-flight. 5760e49d7b0SLars Ellenberg * Otherwise we may cause the remote site to stall on drbd_alloc_pages(), 5770e49d7b0SLars Ellenberg * potentially causing a distributed deadlock on congestion during 5780e49d7b0SLars Ellenberg * online-verify or (checksum-based) resync, if max-buffers, 5790e49d7b0SLars Ellenberg * socket buffer sizes and resync rate settings are mis-configured. */ 5800e49d7b0SLars Ellenberg if (mxb - device->rs_in_flight < number) 5810e49d7b0SLars Ellenberg number = mxb - device->rs_in_flight; 5820e49d7b0SLars Ellenberg 583e65f440dSLars Ellenberg return number; 584e65f440dSLars Ellenberg } 585e65f440dSLars Ellenberg 58644a4d551SLars Ellenberg static int make_resync_request(struct drbd_device *const device, int cancel) 587b411b363SPhilipp Reisner { 58844a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 58944a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 590b411b363SPhilipp Reisner unsigned long bit; 591b411b363SPhilipp Reisner sector_t sector; 592b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 5931816a2b4SLars Ellenberg int max_bio_size; 594e65f440dSLars Ellenberg int number, rollback_i, size; 595b411b363SPhilipp Reisner int align, queued, sndbuf; 5960f0601f4SLars Ellenberg int i = 0; 597b411b363SPhilipp Reisner 598b411b363SPhilipp Reisner if (unlikely(cancel)) 59999920dc5SAndreas Gruenbacher return 0; 600b411b363SPhilipp Reisner 601b30ab791SAndreas Gruenbacher if (device->rs_total == 0) { 602af85e8e8SLars Ellenberg /* empty resync? */ 603b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 60499920dc5SAndreas Gruenbacher return 0; 605af85e8e8SLars Ellenberg } 606af85e8e8SLars Ellenberg 607b30ab791SAndreas Gruenbacher if (!get_ldev(device)) { 608b30ab791SAndreas Gruenbacher /* Since we only need to access device->rsync a 609b30ab791SAndreas Gruenbacher get_ldev_if_state(device,D_FAILED) would be sufficient, but 610b411b363SPhilipp Reisner to continue resync with a broken disk makes no sense at 611b411b363SPhilipp Reisner all */ 612d0180171SAndreas Gruenbacher drbd_err(device, "Disk broke down during resync!\n"); 61399920dc5SAndreas Gruenbacher return 0; 614b411b363SPhilipp Reisner } 615b411b363SPhilipp Reisner 616b30ab791SAndreas Gruenbacher max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; 617b30ab791SAndreas Gruenbacher number = drbd_rs_number_requests(device); 6180e49d7b0SLars Ellenberg if (number <= 0) 6190f0601f4SLars Ellenberg goto requeue; 620b411b363SPhilipp Reisner 621b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 622b411b363SPhilipp Reisner /* Stop generating RS requests, when half of the send buffer is filled */ 62344a4d551SLars Ellenberg mutex_lock(&connection->data.mutex); 62444a4d551SLars Ellenberg if (connection->data.socket) { 62544a4d551SLars Ellenberg queued = connection->data.socket->sk->sk_wmem_queued; 62644a4d551SLars Ellenberg sndbuf = connection->data.socket->sk->sk_sndbuf; 627b411b363SPhilipp Reisner } else { 628b411b363SPhilipp Reisner queued = 1; 629b411b363SPhilipp Reisner sndbuf = 0; 630b411b363SPhilipp Reisner } 63144a4d551SLars Ellenberg mutex_unlock(&connection->data.mutex); 632b411b363SPhilipp Reisner if (queued > sndbuf / 2) 633b411b363SPhilipp Reisner goto requeue; 634b411b363SPhilipp Reisner 635b411b363SPhilipp Reisner next_sector: 636b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 637b30ab791SAndreas Gruenbacher bit = drbd_bm_find_next(device, device->bm_resync_fo); 638b411b363SPhilipp Reisner 6394b0715f0SLars Ellenberg if (bit == DRBD_END_OF_BITMAP) { 640b30ab791SAndreas Gruenbacher device->bm_resync_fo = drbd_bm_bits(device); 641b30ab791SAndreas Gruenbacher put_ldev(device); 64299920dc5SAndreas Gruenbacher return 0; 643b411b363SPhilipp Reisner } 644b411b363SPhilipp Reisner 645b411b363SPhilipp Reisner sector = BM_BIT_TO_SECT(bit); 646b411b363SPhilipp Reisner 647b30ab791SAndreas Gruenbacher if (drbd_rs_should_slow_down(device, sector) || 648b30ab791SAndreas Gruenbacher drbd_try_rs_begin_io(device, sector)) { 649b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit; 650b411b363SPhilipp Reisner goto requeue; 651b411b363SPhilipp Reisner } 652b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit + 1; 653b411b363SPhilipp Reisner 654b30ab791SAndreas Gruenbacher if (unlikely(drbd_bm_test_bit(device, bit) == 0)) { 655b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 656b411b363SPhilipp Reisner goto next_sector; 657b411b363SPhilipp Reisner } 658b411b363SPhilipp Reisner 6591816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE 660b411b363SPhilipp Reisner /* try to find some adjacent bits. 661b411b363SPhilipp Reisner * we stop if we have already the maximum req size. 662b411b363SPhilipp Reisner * 663b411b363SPhilipp Reisner * Additionally always align bigger requests, in order to 664b411b363SPhilipp Reisner * be prepared for all stripe sizes of software RAIDs. 665b411b363SPhilipp Reisner */ 666b411b363SPhilipp Reisner align = 1; 667d207450cSPhilipp Reisner rollback_i = i; 6686377b923SLars Ellenberg while (i < number) { 6691816a2b4SLars Ellenberg if (size + BM_BLOCK_SIZE > max_bio_size) 670b411b363SPhilipp Reisner break; 671b411b363SPhilipp Reisner 672b411b363SPhilipp Reisner /* Be always aligned */ 673b411b363SPhilipp Reisner if (sector & ((1<<(align+3))-1)) 674b411b363SPhilipp Reisner break; 675b411b363SPhilipp Reisner 676b411b363SPhilipp Reisner /* do not cross extent boundaries */ 677b411b363SPhilipp Reisner if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) 678b411b363SPhilipp Reisner break; 679b411b363SPhilipp Reisner /* now, is it actually dirty, after all? 680b411b363SPhilipp Reisner * caution, drbd_bm_test_bit is tri-state for some 681b411b363SPhilipp Reisner * obscure reason; ( b == 0 ) would get the out-of-band 682b411b363SPhilipp Reisner * only accidentally right because of the "oddly sized" 683b411b363SPhilipp Reisner * adjustment below */ 684b30ab791SAndreas Gruenbacher if (drbd_bm_test_bit(device, bit+1) != 1) 685b411b363SPhilipp Reisner break; 686b411b363SPhilipp Reisner bit++; 687b411b363SPhilipp Reisner size += BM_BLOCK_SIZE; 688b411b363SPhilipp Reisner if ((BM_BLOCK_SIZE << align) <= size) 689b411b363SPhilipp Reisner align++; 690b411b363SPhilipp Reisner i++; 691b411b363SPhilipp Reisner } 692b411b363SPhilipp Reisner /* if we merged some, 693b411b363SPhilipp Reisner * reset the offset to start the next drbd_bm_find_next from */ 694b411b363SPhilipp Reisner if (size > BM_BLOCK_SIZE) 695b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit + 1; 696b411b363SPhilipp Reisner #endif 697b411b363SPhilipp Reisner 698b411b363SPhilipp Reisner /* adjust very last sectors, in case we are oddly sized */ 699b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 700b411b363SPhilipp Reisner size = (capacity-sector)<<9; 70144a4d551SLars Ellenberg if (connection->agreed_pro_version >= 89 && 70244a4d551SLars Ellenberg connection->csums_tfm) { 70344a4d551SLars Ellenberg switch (read_for_csum(peer_device, sector, size)) { 70480a40e43SLars Ellenberg case -EIO: /* Disk failure */ 705b30ab791SAndreas Gruenbacher put_ldev(device); 70699920dc5SAndreas Gruenbacher return -EIO; 70780a40e43SLars Ellenberg case -EAGAIN: /* allocation failed, or ldev busy */ 708b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 709b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 710d207450cSPhilipp Reisner i = rollback_i; 711b411b363SPhilipp Reisner goto requeue; 71280a40e43SLars Ellenberg case 0: 71380a40e43SLars Ellenberg /* everything ok */ 71480a40e43SLars Ellenberg break; 71580a40e43SLars Ellenberg default: 71680a40e43SLars Ellenberg BUG(); 717b411b363SPhilipp Reisner } 718b411b363SPhilipp Reisner } else { 71999920dc5SAndreas Gruenbacher int err; 72099920dc5SAndreas Gruenbacher 721b30ab791SAndreas Gruenbacher inc_rs_pending(device); 72244a4d551SLars Ellenberg err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST, 72399920dc5SAndreas Gruenbacher sector, size, ID_SYNCER); 72499920dc5SAndreas Gruenbacher if (err) { 725d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); 726b30ab791SAndreas Gruenbacher dec_rs_pending(device); 727b30ab791SAndreas Gruenbacher put_ldev(device); 72899920dc5SAndreas Gruenbacher return err; 729b411b363SPhilipp Reisner } 730b411b363SPhilipp Reisner } 731b411b363SPhilipp Reisner } 732b411b363SPhilipp Reisner 733b30ab791SAndreas Gruenbacher if (device->bm_resync_fo >= drbd_bm_bits(device)) { 734b411b363SPhilipp Reisner /* last syncer _request_ was sent, 735b411b363SPhilipp Reisner * but the P_RS_DATA_REPLY not yet received. sync will end (and 736b411b363SPhilipp Reisner * next sync group will resume), as soon as we receive the last 737b411b363SPhilipp Reisner * resync data block, and the last bit is cleared. 738b411b363SPhilipp Reisner * until then resync "work" is "inactive" ... 739b411b363SPhilipp Reisner */ 740b30ab791SAndreas Gruenbacher put_ldev(device); 74199920dc5SAndreas Gruenbacher return 0; 742b411b363SPhilipp Reisner } 743b411b363SPhilipp Reisner 744b411b363SPhilipp Reisner requeue: 745b30ab791SAndreas Gruenbacher device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 746b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 747b30ab791SAndreas Gruenbacher put_ldev(device); 74899920dc5SAndreas Gruenbacher return 0; 749b411b363SPhilipp Reisner } 750b411b363SPhilipp Reisner 751d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *device, int cancel) 752b411b363SPhilipp Reisner { 753b411b363SPhilipp Reisner int number, i, size; 754b411b363SPhilipp Reisner sector_t sector; 755b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 75658ffa580SLars Ellenberg bool stop_sector_reached = false; 757b411b363SPhilipp Reisner 758b411b363SPhilipp Reisner if (unlikely(cancel)) 759b411b363SPhilipp Reisner return 1; 760b411b363SPhilipp Reisner 761b30ab791SAndreas Gruenbacher number = drbd_rs_number_requests(device); 762b411b363SPhilipp Reisner 763b30ab791SAndreas Gruenbacher sector = device->ov_position; 764b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 76558ffa580SLars Ellenberg if (sector >= capacity) 766b411b363SPhilipp Reisner return 1; 76758ffa580SLars Ellenberg 76858ffa580SLars Ellenberg /* We check for "finished" only in the reply path: 76958ffa580SLars Ellenberg * w_e_end_ov_reply(). 77058ffa580SLars Ellenberg * We need to send at least one request out. */ 77158ffa580SLars Ellenberg stop_sector_reached = i > 0 772b30ab791SAndreas Gruenbacher && verify_can_do_stop_sector(device) 773b30ab791SAndreas Gruenbacher && sector >= device->ov_stop_sector; 77458ffa580SLars Ellenberg if (stop_sector_reached) 77558ffa580SLars Ellenberg break; 776b411b363SPhilipp Reisner 777b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 778b411b363SPhilipp Reisner 779b30ab791SAndreas Gruenbacher if (drbd_rs_should_slow_down(device, sector) || 780b30ab791SAndreas Gruenbacher drbd_try_rs_begin_io(device, sector)) { 781b30ab791SAndreas Gruenbacher device->ov_position = sector; 782b411b363SPhilipp Reisner goto requeue; 783b411b363SPhilipp Reisner } 784b411b363SPhilipp Reisner 785b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 786b411b363SPhilipp Reisner size = (capacity-sector)<<9; 787b411b363SPhilipp Reisner 788b30ab791SAndreas Gruenbacher inc_rs_pending(device); 78969a22773SAndreas Gruenbacher if (drbd_send_ov_request(first_peer_device(device), sector, size)) { 790b30ab791SAndreas Gruenbacher dec_rs_pending(device); 791b411b363SPhilipp Reisner return 0; 792b411b363SPhilipp Reisner } 793b411b363SPhilipp Reisner sector += BM_SECT_PER_BIT; 794b411b363SPhilipp Reisner } 795b30ab791SAndreas Gruenbacher device->ov_position = sector; 796b411b363SPhilipp Reisner 797b411b363SPhilipp Reisner requeue: 798b30ab791SAndreas Gruenbacher device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 79958ffa580SLars Ellenberg if (i == 0 || !stop_sector_reached) 800b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 801b411b363SPhilipp Reisner return 1; 802b411b363SPhilipp Reisner } 803b411b363SPhilipp Reisner 80499920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel) 805b411b363SPhilipp Reisner { 80684b8c06bSAndreas Gruenbacher struct drbd_device_work *dw = 80784b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device_work, w); 80884b8c06bSAndreas Gruenbacher struct drbd_device *device = dw->device; 80984b8c06bSAndreas Gruenbacher kfree(dw); 810b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 811b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 812b411b363SPhilipp Reisner 81399920dc5SAndreas Gruenbacher return 0; 814b411b363SPhilipp Reisner } 815b411b363SPhilipp Reisner 81699920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel) 817b411b363SPhilipp Reisner { 81884b8c06bSAndreas Gruenbacher struct drbd_device_work *dw = 81984b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device_work, w); 82084b8c06bSAndreas Gruenbacher struct drbd_device *device = dw->device; 82184b8c06bSAndreas Gruenbacher kfree(dw); 822b411b363SPhilipp Reisner 823b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 824b411b363SPhilipp Reisner 82599920dc5SAndreas Gruenbacher return 0; 826b411b363SPhilipp Reisner } 827b411b363SPhilipp Reisner 828b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device) 829af85e8e8SLars Ellenberg { 830a6b32bc3SAndreas Gruenbacher struct drbd_connection *connection = first_peer_device(device)->connection; 8312a67d8b9SPhilipp Reisner 832bde89a9eSAndreas Gruenbacher clear_bit(GOT_PING_ACK, &connection->flags); 833bde89a9eSAndreas Gruenbacher request_ping(connection); 834bde89a9eSAndreas Gruenbacher wait_event(connection->ping_wait, 835bde89a9eSAndreas Gruenbacher test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); 836af85e8e8SLars Ellenberg } 837af85e8e8SLars Ellenberg 838b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device) 839b411b363SPhilipp Reisner { 840b411b363SPhilipp Reisner unsigned long db, dt, dbdt; 841b411b363SPhilipp Reisner unsigned long n_oos; 842b411b363SPhilipp Reisner union drbd_state os, ns; 84384b8c06bSAndreas Gruenbacher struct drbd_device_work *dw; 844b411b363SPhilipp Reisner char *khelper_cmd = NULL; 84526525618SLars Ellenberg int verify_done = 0; 846b411b363SPhilipp Reisner 847b411b363SPhilipp Reisner /* Remove all elements from the resync LRU. Since future actions 848b411b363SPhilipp Reisner * might set bits in the (main) bitmap, then the entries in the 849b411b363SPhilipp Reisner * resync LRU would be wrong. */ 850b30ab791SAndreas Gruenbacher if (drbd_rs_del_all(device)) { 851b411b363SPhilipp Reisner /* In case this is not possible now, most probably because 852b411b363SPhilipp Reisner * there are P_RS_DATA_REPLY Packets lingering on the worker's 853b411b363SPhilipp Reisner * queue (or even the read operations for those packets 854b411b363SPhilipp Reisner * is not finished by now). Retry in 100ms. */ 855b411b363SPhilipp Reisner 85620ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ / 10); 85784b8c06bSAndreas Gruenbacher dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC); 85884b8c06bSAndreas Gruenbacher if (dw) { 85984b8c06bSAndreas Gruenbacher dw->w.cb = w_resync_finished; 86084b8c06bSAndreas Gruenbacher dw->device = device; 86184b8c06bSAndreas Gruenbacher drbd_queue_work(&first_peer_device(device)->connection->sender_work, 86284b8c06bSAndreas Gruenbacher &dw->w); 863b411b363SPhilipp Reisner return 1; 864b411b363SPhilipp Reisner } 86584b8c06bSAndreas Gruenbacher drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); 866b411b363SPhilipp Reisner } 867b411b363SPhilipp Reisner 868b30ab791SAndreas Gruenbacher dt = (jiffies - device->rs_start - device->rs_paused) / HZ; 869b411b363SPhilipp Reisner if (dt <= 0) 870b411b363SPhilipp Reisner dt = 1; 87158ffa580SLars Ellenberg 872b30ab791SAndreas Gruenbacher db = device->rs_total; 87358ffa580SLars Ellenberg /* adjust for verify start and stop sectors, respective reached position */ 874b30ab791SAndreas Gruenbacher if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 875b30ab791SAndreas Gruenbacher db -= device->ov_left; 87658ffa580SLars Ellenberg 877b411b363SPhilipp Reisner dbdt = Bit2KB(db/dt); 878b30ab791SAndreas Gruenbacher device->rs_paused /= HZ; 879b411b363SPhilipp Reisner 880b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 881b411b363SPhilipp Reisner goto out; 882b411b363SPhilipp Reisner 883b30ab791SAndreas Gruenbacher ping_peer(device); 884af85e8e8SLars Ellenberg 8850500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 886b30ab791SAndreas Gruenbacher os = drbd_read_state(device); 887b411b363SPhilipp Reisner 88826525618SLars Ellenberg verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); 88926525618SLars Ellenberg 890b411b363SPhilipp Reisner /* This protects us against multiple calls (that can happen in the presence 891b411b363SPhilipp Reisner of application IO), and against connectivity loss just before we arrive here. */ 892b411b363SPhilipp Reisner if (os.conn <= C_CONNECTED) 893b411b363SPhilipp Reisner goto out_unlock; 894b411b363SPhilipp Reisner 895b411b363SPhilipp Reisner ns = os; 896b411b363SPhilipp Reisner ns.conn = C_CONNECTED; 897b411b363SPhilipp Reisner 898d0180171SAndreas Gruenbacher drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 89926525618SLars Ellenberg verify_done ? "Online verify" : "Resync", 900b30ab791SAndreas Gruenbacher dt + device->rs_paused, device->rs_paused, dbdt); 901b411b363SPhilipp Reisner 902b30ab791SAndreas Gruenbacher n_oos = drbd_bm_total_weight(device); 903b411b363SPhilipp Reisner 904b411b363SPhilipp Reisner if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { 905b411b363SPhilipp Reisner if (n_oos) { 906d0180171SAndreas Gruenbacher drbd_alert(device, "Online verify found %lu %dk block out of sync!\n", 907b411b363SPhilipp Reisner n_oos, Bit2KB(1)); 908b411b363SPhilipp Reisner khelper_cmd = "out-of-sync"; 909b411b363SPhilipp Reisner } 910b411b363SPhilipp Reisner } else { 9110b0ba1efSAndreas Gruenbacher D_ASSERT(device, (n_oos - device->rs_failed) == 0); 912b411b363SPhilipp Reisner 913b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) 914b411b363SPhilipp Reisner khelper_cmd = "after-resync-target"; 915b411b363SPhilipp Reisner 916a6b32bc3SAndreas Gruenbacher if (first_peer_device(device)->connection->csums_tfm && device->rs_total) { 917b30ab791SAndreas Gruenbacher const unsigned long s = device->rs_same_csum; 918b30ab791SAndreas Gruenbacher const unsigned long t = device->rs_total; 919b411b363SPhilipp Reisner const int ratio = 920b411b363SPhilipp Reisner (t == 0) ? 0 : 921b411b363SPhilipp Reisner (t < 100000) ? ((s*100)/t) : (s/(t/100)); 922d0180171SAndreas Gruenbacher drbd_info(device, "%u %% had equal checksums, eliminated: %luK; " 923b411b363SPhilipp Reisner "transferred %luK total %luK\n", 924b411b363SPhilipp Reisner ratio, 925b30ab791SAndreas Gruenbacher Bit2KB(device->rs_same_csum), 926b30ab791SAndreas Gruenbacher Bit2KB(device->rs_total - device->rs_same_csum), 927b30ab791SAndreas Gruenbacher Bit2KB(device->rs_total)); 928b411b363SPhilipp Reisner } 929b411b363SPhilipp Reisner } 930b411b363SPhilipp Reisner 931b30ab791SAndreas Gruenbacher if (device->rs_failed) { 932d0180171SAndreas Gruenbacher drbd_info(device, " %lu failed blocks\n", device->rs_failed); 933b411b363SPhilipp Reisner 934b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 935b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 936b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 937b411b363SPhilipp Reisner } else { 938b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 939b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 940b411b363SPhilipp Reisner } 941b411b363SPhilipp Reisner } else { 942b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 943b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 944b411b363SPhilipp Reisner 945b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 946b30ab791SAndreas Gruenbacher if (device->p_uuid) { 947b411b363SPhilipp Reisner int i; 948b411b363SPhilipp Reisner for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) 949b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, i, device->p_uuid[i]); 950b30ab791SAndreas Gruenbacher drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]); 951b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]); 952b411b363SPhilipp Reisner } else { 953d0180171SAndreas Gruenbacher drbd_err(device, "device->p_uuid is NULL! BUG\n"); 954b411b363SPhilipp Reisner } 955b411b363SPhilipp Reisner } 956b411b363SPhilipp Reisner 95762b0da3aSLars Ellenberg if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { 95862b0da3aSLars Ellenberg /* for verify runs, we don't update uuids here, 95962b0da3aSLars Ellenberg * so there would be nothing to report. */ 960b30ab791SAndreas Gruenbacher drbd_uuid_set_bm(device, 0UL); 961b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "updated UUIDs"); 962b30ab791SAndreas Gruenbacher if (device->p_uuid) { 963b411b363SPhilipp Reisner /* Now the two UUID sets are equal, update what we 964b411b363SPhilipp Reisner * know of the peer. */ 965b411b363SPhilipp Reisner int i; 966b411b363SPhilipp Reisner for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) 967b30ab791SAndreas Gruenbacher device->p_uuid[i] = device->ldev->md.uuid[i]; 968b411b363SPhilipp Reisner } 969b411b363SPhilipp Reisner } 97062b0da3aSLars Ellenberg } 971b411b363SPhilipp Reisner 972b30ab791SAndreas Gruenbacher _drbd_set_state(device, ns, CS_VERBOSE, NULL); 973b411b363SPhilipp Reisner out_unlock: 9740500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 975b30ab791SAndreas Gruenbacher put_ldev(device); 976b411b363SPhilipp Reisner out: 977b30ab791SAndreas Gruenbacher device->rs_total = 0; 978b30ab791SAndreas Gruenbacher device->rs_failed = 0; 979b30ab791SAndreas Gruenbacher device->rs_paused = 0; 98058ffa580SLars Ellenberg 98158ffa580SLars Ellenberg /* reset start sector, if we reached end of device */ 982b30ab791SAndreas Gruenbacher if (verify_done && device->ov_left == 0) 983b30ab791SAndreas Gruenbacher device->ov_start_sector = 0; 984b411b363SPhilipp Reisner 985b30ab791SAndreas Gruenbacher drbd_md_sync(device); 98613d42685SLars Ellenberg 987b411b363SPhilipp Reisner if (khelper_cmd) 988b30ab791SAndreas Gruenbacher drbd_khelper(device, khelper_cmd); 989b411b363SPhilipp Reisner 990b411b363SPhilipp Reisner return 1; 991b411b363SPhilipp Reisner } 992b411b363SPhilipp Reisner 993b411b363SPhilipp Reisner /* helper */ 994b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) 995b411b363SPhilipp Reisner { 996045417f7SAndreas Gruenbacher if (drbd_peer_req_has_active_page(peer_req)) { 997b411b363SPhilipp Reisner /* This might happen if sendpage() has not finished */ 998db830c46SAndreas Gruenbacher int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; 999b30ab791SAndreas Gruenbacher atomic_add(i, &device->pp_in_use_by_net); 1000b30ab791SAndreas Gruenbacher atomic_sub(i, &device->pp_in_use); 10010500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1002a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->net_ee); 10030500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1004435f0740SLars Ellenberg wake_up(&drbd_pp_wait); 1005b411b363SPhilipp Reisner } else 1006b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1007b411b363SPhilipp Reisner } 1008b411b363SPhilipp Reisner 1009b411b363SPhilipp Reisner /** 1010b411b363SPhilipp Reisner * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST 1011b30ab791SAndreas Gruenbacher * @device: DRBD device. 1012b411b363SPhilipp Reisner * @w: work object. 1013b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1014b411b363SPhilipp Reisner */ 101599920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel) 1016b411b363SPhilipp Reisner { 1017a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 10186780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 10196780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 102099920dc5SAndreas Gruenbacher int err; 1021b411b363SPhilipp Reisner 1022b411b363SPhilipp Reisner if (unlikely(cancel)) { 1023b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1024b30ab791SAndreas Gruenbacher dec_unacked(device); 102599920dc5SAndreas Gruenbacher return 0; 1026b411b363SPhilipp Reisner } 1027b411b363SPhilipp Reisner 1028db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 10296780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req); 1030b411b363SPhilipp Reisner } else { 1031b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1032d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegDReply. sector=%llus.\n", 1033db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 1034b411b363SPhilipp Reisner 10356780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); 1036b411b363SPhilipp Reisner } 1037b411b363SPhilipp Reisner 1038b30ab791SAndreas Gruenbacher dec_unacked(device); 1039b411b363SPhilipp Reisner 1040b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1041b411b363SPhilipp Reisner 104299920dc5SAndreas Gruenbacher if (unlikely(err)) 1043d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block() failed\n"); 104499920dc5SAndreas Gruenbacher return err; 1045b411b363SPhilipp Reisner } 1046b411b363SPhilipp Reisner 1047b411b363SPhilipp Reisner /** 1048a209b4aeSAndreas Gruenbacher * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST 1049b411b363SPhilipp Reisner * @w: work object. 1050b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1051b411b363SPhilipp Reisner */ 105299920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel) 1053b411b363SPhilipp Reisner { 1054a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 10556780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 10566780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 105799920dc5SAndreas Gruenbacher int err; 1058b411b363SPhilipp Reisner 1059b411b363SPhilipp Reisner if (unlikely(cancel)) { 1060b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1061b30ab791SAndreas Gruenbacher dec_unacked(device); 106299920dc5SAndreas Gruenbacher return 0; 1063b411b363SPhilipp Reisner } 1064b411b363SPhilipp Reisner 1065b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_FAILED)) { 1066b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1067b30ab791SAndreas Gruenbacher put_ldev(device); 1068b411b363SPhilipp Reisner } 1069b411b363SPhilipp Reisner 1070b30ab791SAndreas Gruenbacher if (device->state.conn == C_AHEAD) { 10716780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); 1072db830c46SAndreas Gruenbacher } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1073b30ab791SAndreas Gruenbacher if (likely(device->state.pdsk >= D_INCONSISTENT)) { 1074b30ab791SAndreas Gruenbacher inc_rs_pending(device); 10756780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 1076b411b363SPhilipp Reisner } else { 1077b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1078d0180171SAndreas Gruenbacher drbd_err(device, "Not sending RSDataReply, " 1079b411b363SPhilipp Reisner "partner DISKLESS!\n"); 108099920dc5SAndreas Gruenbacher err = 0; 1081b411b363SPhilipp Reisner } 1082b411b363SPhilipp Reisner } else { 1083b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1084d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegRSDReply. sector %llus.\n", 1085db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 1086b411b363SPhilipp Reisner 10876780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 1088b411b363SPhilipp Reisner 1089b411b363SPhilipp Reisner /* update resync data with failure */ 1090b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); 1091b411b363SPhilipp Reisner } 1092b411b363SPhilipp Reisner 1093b30ab791SAndreas Gruenbacher dec_unacked(device); 1094b411b363SPhilipp Reisner 1095b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1096b411b363SPhilipp Reisner 109799920dc5SAndreas Gruenbacher if (unlikely(err)) 1098d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block() failed\n"); 109999920dc5SAndreas Gruenbacher return err; 1100b411b363SPhilipp Reisner } 1101b411b363SPhilipp Reisner 110299920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) 1103b411b363SPhilipp Reisner { 1104a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11056780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 11066780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1107b411b363SPhilipp Reisner struct digest_info *di; 1108b411b363SPhilipp Reisner int digest_size; 1109b411b363SPhilipp Reisner void *digest = NULL; 111099920dc5SAndreas Gruenbacher int err, eq = 0; 1111b411b363SPhilipp Reisner 1112b411b363SPhilipp Reisner if (unlikely(cancel)) { 1113b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1114b30ab791SAndreas Gruenbacher dec_unacked(device); 111599920dc5SAndreas Gruenbacher return 0; 1116b411b363SPhilipp Reisner } 1117b411b363SPhilipp Reisner 1118b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1119b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1120b30ab791SAndreas Gruenbacher put_ldev(device); 11211d53f09eSLars Ellenberg } 1122b411b363SPhilipp Reisner 1123db830c46SAndreas Gruenbacher di = peer_req->digest; 1124b411b363SPhilipp Reisner 1125db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1126b411b363SPhilipp Reisner /* quick hack to try to avoid a race against reconfiguration. 1127b411b363SPhilipp Reisner * a real fix would be much more involved, 1128b411b363SPhilipp Reisner * introducing more locking mechanisms */ 11296780139cSAndreas Gruenbacher if (peer_device->connection->csums_tfm) { 11306780139cSAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm); 11310b0ba1efSAndreas Gruenbacher D_ASSERT(device, digest_size == di->digest_size); 1132b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1133b411b363SPhilipp Reisner } 1134b411b363SPhilipp Reisner if (digest) { 11356780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 1136b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1137b411b363SPhilipp Reisner kfree(digest); 1138b411b363SPhilipp Reisner } 1139b411b363SPhilipp Reisner 1140b411b363SPhilipp Reisner if (eq) { 1141b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); 1142676396d5SLars Ellenberg /* rs_same_csums unit is BM_BLOCK_SIZE */ 1143b30ab791SAndreas Gruenbacher device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; 11446780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); 1145b411b363SPhilipp Reisner } else { 1146b30ab791SAndreas Gruenbacher inc_rs_pending(device); 1147db830c46SAndreas Gruenbacher peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ 1148db830c46SAndreas Gruenbacher peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ 1149204bba99SPhilipp Reisner kfree(di); 11506780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 1151b411b363SPhilipp Reisner } 1152b411b363SPhilipp Reisner } else { 11536780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 1154b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1155d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); 1156b411b363SPhilipp Reisner } 1157b411b363SPhilipp Reisner 1158b30ab791SAndreas Gruenbacher dec_unacked(device); 1159b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1160b411b363SPhilipp Reisner 116199920dc5SAndreas Gruenbacher if (unlikely(err)) 1162d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block/ack() failed\n"); 116399920dc5SAndreas Gruenbacher return err; 1164b411b363SPhilipp Reisner } 1165b411b363SPhilipp Reisner 116699920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel) 1167b411b363SPhilipp Reisner { 1168a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11696780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 11706780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1171db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1172db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 1173b411b363SPhilipp Reisner int digest_size; 1174b411b363SPhilipp Reisner void *digest; 117599920dc5SAndreas Gruenbacher int err = 0; 1176b411b363SPhilipp Reisner 1177b411b363SPhilipp Reisner if (unlikely(cancel)) 1178b411b363SPhilipp Reisner goto out; 1179b411b363SPhilipp Reisner 11806780139cSAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm); 1181b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 11828f21420eSPhilipp Reisner if (!digest) { 118399920dc5SAndreas Gruenbacher err = 1; /* terminate the connection in case the allocation failed */ 11848f21420eSPhilipp Reisner goto out; 11858f21420eSPhilipp Reisner } 11868f21420eSPhilipp Reisner 1187db830c46SAndreas Gruenbacher if (likely(!(peer_req->flags & EE_WAS_ERROR))) 11886780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 11898f21420eSPhilipp Reisner else 11908f21420eSPhilipp Reisner memset(digest, 0, digest_size); 11918f21420eSPhilipp Reisner 119253ea4331SLars Ellenberg /* Free e and pages before send. 119353ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 119453ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 119553ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 1196c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 1197b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1198db830c46SAndreas Gruenbacher peer_req = NULL; 1199b30ab791SAndreas Gruenbacher inc_rs_pending(device); 12006780139cSAndreas Gruenbacher err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); 120199920dc5SAndreas Gruenbacher if (err) 1202b30ab791SAndreas Gruenbacher dec_rs_pending(device); 1203b411b363SPhilipp Reisner kfree(digest); 1204b411b363SPhilipp Reisner 1205b411b363SPhilipp Reisner out: 1206db830c46SAndreas Gruenbacher if (peer_req) 1207b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1208b30ab791SAndreas Gruenbacher dec_unacked(device); 120999920dc5SAndreas Gruenbacher return err; 1210b411b363SPhilipp Reisner } 1211b411b363SPhilipp Reisner 1212b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) 1213b411b363SPhilipp Reisner { 1214b30ab791SAndreas Gruenbacher if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { 1215b30ab791SAndreas Gruenbacher device->ov_last_oos_size += size>>9; 1216b411b363SPhilipp Reisner } else { 1217b30ab791SAndreas Gruenbacher device->ov_last_oos_start = sector; 1218b30ab791SAndreas Gruenbacher device->ov_last_oos_size = size>>9; 1219b411b363SPhilipp Reisner } 1220b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, sector, size); 1221b411b363SPhilipp Reisner } 1222b411b363SPhilipp Reisner 122399920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel) 1224b411b363SPhilipp Reisner { 1225a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 12266780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 12276780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1228b411b363SPhilipp Reisner struct digest_info *di; 1229b411b363SPhilipp Reisner void *digest; 1230db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1231db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 123253ea4331SLars Ellenberg int digest_size; 123399920dc5SAndreas Gruenbacher int err, eq = 0; 123458ffa580SLars Ellenberg bool stop_sector_reached = false; 1235b411b363SPhilipp Reisner 1236b411b363SPhilipp Reisner if (unlikely(cancel)) { 1237b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1238b30ab791SAndreas Gruenbacher dec_unacked(device); 123999920dc5SAndreas Gruenbacher return 0; 1240b411b363SPhilipp Reisner } 1241b411b363SPhilipp Reisner 1242b411b363SPhilipp Reisner /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all 1243b411b363SPhilipp Reisner * the resync lru has been cleaned up already */ 1244b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1245b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1246b30ab791SAndreas Gruenbacher put_ldev(device); 12471d53f09eSLars Ellenberg } 1248b411b363SPhilipp Reisner 1249db830c46SAndreas Gruenbacher di = peer_req->digest; 1250b411b363SPhilipp Reisner 1251db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 12526780139cSAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm); 1253b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1254b411b363SPhilipp Reisner if (digest) { 12556780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 1256b411b363SPhilipp Reisner 12570b0ba1efSAndreas Gruenbacher D_ASSERT(device, digest_size == di->digest_size); 1258b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1259b411b363SPhilipp Reisner kfree(digest); 1260b411b363SPhilipp Reisner } 1261b411b363SPhilipp Reisner } 1262b411b363SPhilipp Reisner 12639676c760SLars Ellenberg /* Free peer_req and pages before send. 126453ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 126553ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 126653ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 1267c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 1268b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1269b411b363SPhilipp Reisner if (!eq) 1270b30ab791SAndreas Gruenbacher drbd_ov_out_of_sync_found(device, sector, size); 1271b411b363SPhilipp Reisner else 1272b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 1273b411b363SPhilipp Reisner 12746780139cSAndreas Gruenbacher err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, 1275b411b363SPhilipp Reisner eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); 1276b411b363SPhilipp Reisner 1277b30ab791SAndreas Gruenbacher dec_unacked(device); 1278b411b363SPhilipp Reisner 1279b30ab791SAndreas Gruenbacher --device->ov_left; 1280ea5442afSLars Ellenberg 1281ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */ 1282b30ab791SAndreas Gruenbacher if ((device->ov_left & 0x200) == 0x200) 1283b30ab791SAndreas Gruenbacher drbd_advance_rs_marks(device, device->ov_left); 1284ea5442afSLars Ellenberg 1285b30ab791SAndreas Gruenbacher stop_sector_reached = verify_can_do_stop_sector(device) && 1286b30ab791SAndreas Gruenbacher (sector + (size>>9)) >= device->ov_stop_sector; 128758ffa580SLars Ellenberg 1288b30ab791SAndreas Gruenbacher if (device->ov_left == 0 || stop_sector_reached) { 1289b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 1290b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 1291b411b363SPhilipp Reisner } 1292b411b363SPhilipp Reisner 129399920dc5SAndreas Gruenbacher return err; 1294b411b363SPhilipp Reisner } 1295b411b363SPhilipp Reisner 1296b6dd1a89SLars Ellenberg /* FIXME 1297b6dd1a89SLars Ellenberg * We need to track the number of pending barrier acks, 1298b6dd1a89SLars Ellenberg * and to be able to wait for them. 1299b6dd1a89SLars Ellenberg * See also comment in drbd_adm_attach before drbd_suspend_io. 1300b6dd1a89SLars Ellenberg */ 1301bde89a9eSAndreas Gruenbacher static int drbd_send_barrier(struct drbd_connection *connection) 1302b411b363SPhilipp Reisner { 13039f5bdc33SAndreas Gruenbacher struct p_barrier *p; 1304b6dd1a89SLars Ellenberg struct drbd_socket *sock; 1305b411b363SPhilipp Reisner 1306bde89a9eSAndreas Gruenbacher sock = &connection->data; 1307bde89a9eSAndreas Gruenbacher p = conn_prepare_command(connection, sock); 13089f5bdc33SAndreas Gruenbacher if (!p) 13099f5bdc33SAndreas Gruenbacher return -EIO; 1310bde89a9eSAndreas Gruenbacher p->barrier = connection->send.current_epoch_nr; 1311b6dd1a89SLars Ellenberg p->pad = 0; 1312bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes = 0; 1313b6dd1a89SLars Ellenberg 1314bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); 1315b411b363SPhilipp Reisner } 1316b411b363SPhilipp Reisner 131799920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel) 1318b411b363SPhilipp Reisner { 131984b8c06bSAndreas Gruenbacher struct drbd_device *device = 132084b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, unplug_work); 13219f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 13229f5bdc33SAndreas Gruenbacher 1323b411b363SPhilipp Reisner if (cancel) 132499920dc5SAndreas Gruenbacher return 0; 1325a6b32bc3SAndreas Gruenbacher sock = &first_peer_device(device)->connection->data; 132669a22773SAndreas Gruenbacher if (!drbd_prepare_command(first_peer_device(device), sock)) 13279f5bdc33SAndreas Gruenbacher return -EIO; 132869a22773SAndreas Gruenbacher return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0); 1329b411b363SPhilipp Reisner } 1330b411b363SPhilipp Reisner 1331bde89a9eSAndreas Gruenbacher static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch) 13324eb9b3cbSLars Ellenberg { 1333bde89a9eSAndreas Gruenbacher if (!connection->send.seen_any_write_yet) { 1334bde89a9eSAndreas Gruenbacher connection->send.seen_any_write_yet = true; 1335bde89a9eSAndreas Gruenbacher connection->send.current_epoch_nr = epoch; 1336bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes = 0; 13374eb9b3cbSLars Ellenberg } 13384eb9b3cbSLars Ellenberg } 13394eb9b3cbSLars Ellenberg 1340bde89a9eSAndreas Gruenbacher static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch) 13414eb9b3cbSLars Ellenberg { 13424eb9b3cbSLars Ellenberg /* re-init if first write on this connection */ 1343bde89a9eSAndreas Gruenbacher if (!connection->send.seen_any_write_yet) 13444eb9b3cbSLars Ellenberg return; 1345bde89a9eSAndreas Gruenbacher if (connection->send.current_epoch_nr != epoch) { 1346bde89a9eSAndreas Gruenbacher if (connection->send.current_epoch_writes) 1347bde89a9eSAndreas Gruenbacher drbd_send_barrier(connection); 1348bde89a9eSAndreas Gruenbacher connection->send.current_epoch_nr = epoch; 13494eb9b3cbSLars Ellenberg } 13504eb9b3cbSLars Ellenberg } 13514eb9b3cbSLars Ellenberg 13528f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel) 135373a01a18SPhilipp Reisner { 135473a01a18SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 135584b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 135644a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 135744a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device->connection; 135899920dc5SAndreas Gruenbacher int err; 135973a01a18SPhilipp Reisner 136073a01a18SPhilipp Reisner if (unlikely(cancel)) { 13618554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 136299920dc5SAndreas Gruenbacher return 0; 136373a01a18SPhilipp Reisner } 136473a01a18SPhilipp Reisner 1365bde89a9eSAndreas Gruenbacher /* this time, no connection->send.current_epoch_writes++; 1366b6dd1a89SLars Ellenberg * If it was sent, it was the closing barrier for the last 1367b6dd1a89SLars Ellenberg * replicated epoch, before we went into AHEAD mode. 1368b6dd1a89SLars Ellenberg * No more barriers will be sent, until we leave AHEAD mode again. */ 1369bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1370b6dd1a89SLars Ellenberg 137144a4d551SLars Ellenberg err = drbd_send_out_of_sync(peer_device, req); 13728554df1cSAndreas Gruenbacher req_mod(req, OOS_HANDED_TO_NETWORK); 137373a01a18SPhilipp Reisner 137499920dc5SAndreas Gruenbacher return err; 137573a01a18SPhilipp Reisner } 137673a01a18SPhilipp Reisner 1377b411b363SPhilipp Reisner /** 1378b411b363SPhilipp Reisner * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request 1379b411b363SPhilipp Reisner * @w: work object. 1380b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1381b411b363SPhilipp Reisner */ 138299920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel) 1383b411b363SPhilipp Reisner { 1384b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 138584b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 138644a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 138744a4d551SLars Ellenberg struct drbd_connection *connection = peer_device->connection; 138899920dc5SAndreas Gruenbacher int err; 1389b411b363SPhilipp Reisner 1390b411b363SPhilipp Reisner if (unlikely(cancel)) { 13918554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 139299920dc5SAndreas Gruenbacher return 0; 1393b411b363SPhilipp Reisner } 1394b411b363SPhilipp Reisner 1395bde89a9eSAndreas Gruenbacher re_init_if_first_write(connection, req->epoch); 1396bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1397bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes++; 1398b6dd1a89SLars Ellenberg 139944a4d551SLars Ellenberg err = drbd_send_dblock(peer_device, req); 140099920dc5SAndreas Gruenbacher req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1401b411b363SPhilipp Reisner 140299920dc5SAndreas Gruenbacher return err; 1403b411b363SPhilipp Reisner } 1404b411b363SPhilipp Reisner 1405b411b363SPhilipp Reisner /** 1406b411b363SPhilipp Reisner * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet 1407b411b363SPhilipp Reisner * @w: work object. 1408b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1409b411b363SPhilipp Reisner */ 141099920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel) 1411b411b363SPhilipp Reisner { 1412b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 141384b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 141444a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 141544a4d551SLars Ellenberg struct drbd_connection *connection = peer_device->connection; 141699920dc5SAndreas Gruenbacher int err; 1417b411b363SPhilipp Reisner 1418b411b363SPhilipp Reisner if (unlikely(cancel)) { 14198554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 142099920dc5SAndreas Gruenbacher return 0; 1421b411b363SPhilipp Reisner } 1422b411b363SPhilipp Reisner 1423b6dd1a89SLars Ellenberg /* Even read requests may close a write epoch, 1424b6dd1a89SLars Ellenberg * if there was any yet. */ 1425bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1426b6dd1a89SLars Ellenberg 142744a4d551SLars Ellenberg err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, 1428b411b363SPhilipp Reisner (unsigned long)req); 1429b411b363SPhilipp Reisner 143099920dc5SAndreas Gruenbacher req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1431b411b363SPhilipp Reisner 143299920dc5SAndreas Gruenbacher return err; 1433b411b363SPhilipp Reisner } 1434b411b363SPhilipp Reisner 143599920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel) 1436265be2d0SPhilipp Reisner { 1437265be2d0SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 143884b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 1439265be2d0SPhilipp Reisner 14400778286aSPhilipp Reisner if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) 1441b30ab791SAndreas Gruenbacher drbd_al_begin_io(device, &req->i, false); 1442265be2d0SPhilipp Reisner 1443265be2d0SPhilipp Reisner drbd_req_make_private_bio(req, req->master_bio); 1444b30ab791SAndreas Gruenbacher req->private_bio->bi_bdev = device->ldev->backing_bdev; 1445265be2d0SPhilipp Reisner generic_make_request(req->private_bio); 1446265be2d0SPhilipp Reisner 144799920dc5SAndreas Gruenbacher return 0; 1448265be2d0SPhilipp Reisner } 1449265be2d0SPhilipp Reisner 1450b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device) 1451b411b363SPhilipp Reisner { 1452b30ab791SAndreas Gruenbacher struct drbd_device *odev = device; 145395f8efd0SAndreas Gruenbacher int resync_after; 1454b411b363SPhilipp Reisner 1455b411b363SPhilipp Reisner while (1) { 1456a3f8f7dcSLars Ellenberg if (!odev->ldev || odev->state.disk == D_DISKLESS) 1457438c8374SPhilipp Reisner return 1; 1458daeda1ccSPhilipp Reisner rcu_read_lock(); 145995f8efd0SAndreas Gruenbacher resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 1460daeda1ccSPhilipp Reisner rcu_read_unlock(); 146195f8efd0SAndreas Gruenbacher if (resync_after == -1) 1462b411b363SPhilipp Reisner return 1; 1463b30ab791SAndreas Gruenbacher odev = minor_to_device(resync_after); 1464a3f8f7dcSLars Ellenberg if (!odev) 1465841ce241SAndreas Gruenbacher return 1; 1466b411b363SPhilipp Reisner if ((odev->state.conn >= C_SYNC_SOURCE && 1467b411b363SPhilipp Reisner odev->state.conn <= C_PAUSED_SYNC_T) || 1468b411b363SPhilipp Reisner odev->state.aftr_isp || odev->state.peer_isp || 1469b411b363SPhilipp Reisner odev->state.user_isp) 1470b411b363SPhilipp Reisner return 0; 1471b411b363SPhilipp Reisner } 1472b411b363SPhilipp Reisner } 1473b411b363SPhilipp Reisner 1474b411b363SPhilipp Reisner /** 1475b411b363SPhilipp Reisner * _drbd_pause_after() - Pause resync on all devices that may not resync now 1476b30ab791SAndreas Gruenbacher * @device: DRBD device. 1477b411b363SPhilipp Reisner * 1478b411b363SPhilipp Reisner * Called from process context only (admin command and after_state_ch). 1479b411b363SPhilipp Reisner */ 1480b30ab791SAndreas Gruenbacher static int _drbd_pause_after(struct drbd_device *device) 1481b411b363SPhilipp Reisner { 148254761697SAndreas Gruenbacher struct drbd_device *odev; 1483b411b363SPhilipp Reisner int i, rv = 0; 1484b411b363SPhilipp Reisner 1485695d08faSPhilipp Reisner rcu_read_lock(); 148605a10ec7SAndreas Gruenbacher idr_for_each_entry(&drbd_devices, odev, i) { 1487b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1488b411b363SPhilipp Reisner continue; 1489b411b363SPhilipp Reisner if (!_drbd_may_sync_now(odev)) 1490b411b363SPhilipp Reisner rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) 1491b411b363SPhilipp Reisner != SS_NOTHING_TO_DO); 1492b411b363SPhilipp Reisner } 1493695d08faSPhilipp Reisner rcu_read_unlock(); 1494b411b363SPhilipp Reisner 1495b411b363SPhilipp Reisner return rv; 1496b411b363SPhilipp Reisner } 1497b411b363SPhilipp Reisner 1498b411b363SPhilipp Reisner /** 1499b411b363SPhilipp Reisner * _drbd_resume_next() - Resume resync on all devices that may resync now 1500b30ab791SAndreas Gruenbacher * @device: DRBD device. 1501b411b363SPhilipp Reisner * 1502b411b363SPhilipp Reisner * Called from process context only (admin command and worker). 1503b411b363SPhilipp Reisner */ 1504b30ab791SAndreas Gruenbacher static int _drbd_resume_next(struct drbd_device *device) 1505b411b363SPhilipp Reisner { 150654761697SAndreas Gruenbacher struct drbd_device *odev; 1507b411b363SPhilipp Reisner int i, rv = 0; 1508b411b363SPhilipp Reisner 1509695d08faSPhilipp Reisner rcu_read_lock(); 151005a10ec7SAndreas Gruenbacher idr_for_each_entry(&drbd_devices, odev, i) { 1511b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1512b411b363SPhilipp Reisner continue; 1513b411b363SPhilipp Reisner if (odev->state.aftr_isp) { 1514b411b363SPhilipp Reisner if (_drbd_may_sync_now(odev)) 1515b411b363SPhilipp Reisner rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), 1516b411b363SPhilipp Reisner CS_HARD, NULL) 1517b411b363SPhilipp Reisner != SS_NOTHING_TO_DO) ; 1518b411b363SPhilipp Reisner } 1519b411b363SPhilipp Reisner } 1520695d08faSPhilipp Reisner rcu_read_unlock(); 1521b411b363SPhilipp Reisner return rv; 1522b411b363SPhilipp Reisner } 1523b411b363SPhilipp Reisner 1524b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device) 1525b411b363SPhilipp Reisner { 1526b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1527b30ab791SAndreas Gruenbacher _drbd_resume_next(device); 1528b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1529b411b363SPhilipp Reisner } 1530b411b363SPhilipp Reisner 1531b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device) 1532b411b363SPhilipp Reisner { 1533b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1534b30ab791SAndreas Gruenbacher _drbd_pause_after(device); 1535b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1536b411b363SPhilipp Reisner } 1537b411b363SPhilipp Reisner 1538dc97b708SPhilipp Reisner /* caller must hold global_state_lock */ 1539b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) 1540b411b363SPhilipp Reisner { 154154761697SAndreas Gruenbacher struct drbd_device *odev; 154295f8efd0SAndreas Gruenbacher int resync_after; 1543b411b363SPhilipp Reisner 1544b411b363SPhilipp Reisner if (o_minor == -1) 1545b411b363SPhilipp Reisner return NO_ERROR; 1546a3f8f7dcSLars Ellenberg if (o_minor < -1 || o_minor > MINORMASK) 154795f8efd0SAndreas Gruenbacher return ERR_RESYNC_AFTER; 1548b411b363SPhilipp Reisner 1549b411b363SPhilipp Reisner /* check for loops */ 1550b30ab791SAndreas Gruenbacher odev = minor_to_device(o_minor); 1551b411b363SPhilipp Reisner while (1) { 1552b30ab791SAndreas Gruenbacher if (odev == device) 155395f8efd0SAndreas Gruenbacher return ERR_RESYNC_AFTER_CYCLE; 1554b411b363SPhilipp Reisner 1555a3f8f7dcSLars Ellenberg /* You are free to depend on diskless, non-existing, 1556a3f8f7dcSLars Ellenberg * or not yet/no longer existing minors. 1557a3f8f7dcSLars Ellenberg * We only reject dependency loops. 1558a3f8f7dcSLars Ellenberg * We cannot follow the dependency chain beyond a detached or 1559a3f8f7dcSLars Ellenberg * missing minor. 1560a3f8f7dcSLars Ellenberg */ 1561a3f8f7dcSLars Ellenberg if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS) 1562a3f8f7dcSLars Ellenberg return NO_ERROR; 1563a3f8f7dcSLars Ellenberg 1564daeda1ccSPhilipp Reisner rcu_read_lock(); 156595f8efd0SAndreas Gruenbacher resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 1566daeda1ccSPhilipp Reisner rcu_read_unlock(); 1567b411b363SPhilipp Reisner /* dependency chain ends here, no cycles. */ 156895f8efd0SAndreas Gruenbacher if (resync_after == -1) 1569b411b363SPhilipp Reisner return NO_ERROR; 1570b411b363SPhilipp Reisner 1571b411b363SPhilipp Reisner /* follow the dependency chain */ 1572b30ab791SAndreas Gruenbacher odev = minor_to_device(resync_after); 1573b411b363SPhilipp Reisner } 1574b411b363SPhilipp Reisner } 1575b411b363SPhilipp Reisner 1576dc97b708SPhilipp Reisner /* caller must hold global_state_lock */ 1577b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device) 1578b411b363SPhilipp Reisner { 1579b411b363SPhilipp Reisner int changes; 1580b411b363SPhilipp Reisner 1581b411b363SPhilipp Reisner do { 1582b30ab791SAndreas Gruenbacher changes = _drbd_pause_after(device); 1583b30ab791SAndreas Gruenbacher changes |= _drbd_resume_next(device); 1584b411b363SPhilipp Reisner } while (changes); 1585b411b363SPhilipp Reisner } 1586b411b363SPhilipp Reisner 1587b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device) 15889bd28d3cSLars Ellenberg { 1589813472ceSPhilipp Reisner struct fifo_buffer *plan; 1590813472ceSPhilipp Reisner 1591b30ab791SAndreas Gruenbacher atomic_set(&device->rs_sect_in, 0); 1592b30ab791SAndreas Gruenbacher atomic_set(&device->rs_sect_ev, 0); 1593b30ab791SAndreas Gruenbacher device->rs_in_flight = 0; 1594813472ceSPhilipp Reisner 1595813472ceSPhilipp Reisner /* Updating the RCU protected object in place is necessary since 1596813472ceSPhilipp Reisner this function gets called from atomic context. 1597813472ceSPhilipp Reisner It is valid since all other updates also lead to an completely 1598813472ceSPhilipp Reisner empty fifo */ 1599813472ceSPhilipp Reisner rcu_read_lock(); 1600b30ab791SAndreas Gruenbacher plan = rcu_dereference(device->rs_plan_s); 1601813472ceSPhilipp Reisner plan->total = 0; 1602813472ceSPhilipp Reisner fifo_set(plan, 0); 1603813472ceSPhilipp Reisner rcu_read_unlock(); 16049bd28d3cSLars Ellenberg } 16059bd28d3cSLars Ellenberg 16061f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data) 16071f04af33SPhilipp Reisner { 1608b30ab791SAndreas Gruenbacher struct drbd_device *device = (struct drbd_device *) data; 16091f04af33SPhilipp Reisner 161084b8c06bSAndreas Gruenbacher drbd_queue_work(&first_peer_device(device)->connection->sender_work, 161184b8c06bSAndreas Gruenbacher &device->start_resync_work); 16121f04af33SPhilipp Reisner } 16131f04af33SPhilipp Reisner 161499920dc5SAndreas Gruenbacher int w_start_resync(struct drbd_work *w, int cancel) 16151f04af33SPhilipp Reisner { 161684b8c06bSAndreas Gruenbacher struct drbd_device *device = 161784b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, start_resync_work); 161800d56944SPhilipp Reisner 1619b30ab791SAndreas Gruenbacher if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { 1620d0180171SAndreas Gruenbacher drbd_warn(device, "w_start_resync later...\n"); 1621b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ/10; 1622b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 162399920dc5SAndreas Gruenbacher return 0; 16241f04af33SPhilipp Reisner } 16251f04af33SPhilipp Reisner 1626b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_SOURCE); 1627b30ab791SAndreas Gruenbacher clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); 162899920dc5SAndreas Gruenbacher return 0; 16291f04af33SPhilipp Reisner } 16301f04af33SPhilipp Reisner 1631b411b363SPhilipp Reisner /** 1632b411b363SPhilipp Reisner * drbd_start_resync() - Start the resync process 1633b30ab791SAndreas Gruenbacher * @device: DRBD device. 1634b411b363SPhilipp Reisner * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET 1635b411b363SPhilipp Reisner * 1636b411b363SPhilipp Reisner * This function might bring you directly into one of the 1637b411b363SPhilipp Reisner * C_PAUSED_SYNC_* states. 1638b411b363SPhilipp Reisner */ 1639b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) 1640b411b363SPhilipp Reisner { 164144a4d551SLars Ellenberg struct drbd_peer_device *peer_device = first_peer_device(device); 164244a4d551SLars Ellenberg struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 1643b411b363SPhilipp Reisner union drbd_state ns; 1644b411b363SPhilipp Reisner int r; 1645b411b363SPhilipp Reisner 1646b30ab791SAndreas Gruenbacher if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) { 1647d0180171SAndreas Gruenbacher drbd_err(device, "Resync already running!\n"); 1648b411b363SPhilipp Reisner return; 1649b411b363SPhilipp Reisner } 1650b411b363SPhilipp Reisner 1651b30ab791SAndreas Gruenbacher if (!test_bit(B_RS_H_DONE, &device->flags)) { 1652b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) { 1653b411b363SPhilipp Reisner /* Since application IO was locked out during C_WF_BITMAP_T and 1654b411b363SPhilipp Reisner C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET 1655b411b363SPhilipp Reisner we check that we might make the data inconsistent. */ 1656b30ab791SAndreas Gruenbacher r = drbd_khelper(device, "before-resync-target"); 1657b411b363SPhilipp Reisner r = (r >> 8) & 0xff; 1658b411b363SPhilipp Reisner if (r > 0) { 1659d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-target handler returned %d, " 1660b411b363SPhilipp Reisner "dropping connection.\n", r); 166144a4d551SLars Ellenberg conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 1662b411b363SPhilipp Reisner return; 1663b411b363SPhilipp Reisner } 166409b9e797SPhilipp Reisner } else /* C_SYNC_SOURCE */ { 1665b30ab791SAndreas Gruenbacher r = drbd_khelper(device, "before-resync-source"); 166609b9e797SPhilipp Reisner r = (r >> 8) & 0xff; 166709b9e797SPhilipp Reisner if (r > 0) { 166809b9e797SPhilipp Reisner if (r == 3) { 1669d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-source handler returned %d, " 167009b9e797SPhilipp Reisner "ignoring. Old userland tools?", r); 167109b9e797SPhilipp Reisner } else { 1672d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-source handler returned %d, " 167309b9e797SPhilipp Reisner "dropping connection.\n", r); 167444a4d551SLars Ellenberg conn_request_state(connection, 1675a6b32bc3SAndreas Gruenbacher NS(conn, C_DISCONNECTING), CS_HARD); 167609b9e797SPhilipp Reisner return; 167709b9e797SPhilipp Reisner } 167809b9e797SPhilipp Reisner } 1679b411b363SPhilipp Reisner } 1680e64a3294SPhilipp Reisner } 1681b411b363SPhilipp Reisner 168244a4d551SLars Ellenberg if (current == connection->worker.task) { 1683dad20554SPhilipp Reisner /* The worker should not sleep waiting for state_mutex, 1684e64a3294SPhilipp Reisner that can take long */ 1685b30ab791SAndreas Gruenbacher if (!mutex_trylock(device->state_mutex)) { 1686b30ab791SAndreas Gruenbacher set_bit(B_RS_H_DONE, &device->flags); 1687b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ/5; 1688b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 1689e64a3294SPhilipp Reisner return; 1690e64a3294SPhilipp Reisner } 1691e64a3294SPhilipp Reisner } else { 1692b30ab791SAndreas Gruenbacher mutex_lock(device->state_mutex); 1693e64a3294SPhilipp Reisner } 1694b30ab791SAndreas Gruenbacher clear_bit(B_RS_H_DONE, &device->flags); 1695b411b363SPhilipp Reisner 1696074f4afeSLars Ellenberg /* req_lock: serialize with drbd_send_and_submit() and others 1697074f4afeSLars Ellenberg * global_state_lock: for stable sync-after dependencies */ 1698074f4afeSLars Ellenberg spin_lock_irq(&device->resource->req_lock); 1699074f4afeSLars Ellenberg write_lock(&global_state_lock); 1700a700471bSPhilipp Reisner /* Did some connection breakage or IO error race with us? */ 1701b30ab791SAndreas Gruenbacher if (device->state.conn < C_CONNECTED 1702b30ab791SAndreas Gruenbacher || !get_ldev_if_state(device, D_NEGOTIATING)) { 1703074f4afeSLars Ellenberg write_unlock(&global_state_lock); 1704074f4afeSLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 1705b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 1706b411b363SPhilipp Reisner return; 1707b411b363SPhilipp Reisner } 1708b411b363SPhilipp Reisner 1709b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 1710b411b363SPhilipp Reisner 1711b30ab791SAndreas Gruenbacher ns.aftr_isp = !_drbd_may_sync_now(device); 1712b411b363SPhilipp Reisner 1713b411b363SPhilipp Reisner ns.conn = side; 1714b411b363SPhilipp Reisner 1715b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) 1716b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 1717b411b363SPhilipp Reisner else /* side == C_SYNC_SOURCE */ 1718b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 1719b411b363SPhilipp Reisner 1720b30ab791SAndreas Gruenbacher r = __drbd_set_state(device, ns, CS_VERBOSE, NULL); 1721b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 1722b411b363SPhilipp Reisner 1723b411b363SPhilipp Reisner if (ns.conn < C_CONNECTED) 1724b411b363SPhilipp Reisner r = SS_UNKNOWN_ERROR; 1725b411b363SPhilipp Reisner 1726b411b363SPhilipp Reisner if (r == SS_SUCCESS) { 1727b30ab791SAndreas Gruenbacher unsigned long tw = drbd_bm_total_weight(device); 17281d7734a0SLars Ellenberg unsigned long now = jiffies; 17291d7734a0SLars Ellenberg int i; 17301d7734a0SLars Ellenberg 1731b30ab791SAndreas Gruenbacher device->rs_failed = 0; 1732b30ab791SAndreas Gruenbacher device->rs_paused = 0; 1733b30ab791SAndreas Gruenbacher device->rs_same_csum = 0; 1734b30ab791SAndreas Gruenbacher device->rs_last_events = 0; 1735b30ab791SAndreas Gruenbacher device->rs_last_sect_ev = 0; 1736b30ab791SAndreas Gruenbacher device->rs_total = tw; 1737b30ab791SAndreas Gruenbacher device->rs_start = now; 17381d7734a0SLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 1739b30ab791SAndreas Gruenbacher device->rs_mark_left[i] = tw; 1740b30ab791SAndreas Gruenbacher device->rs_mark_time[i] = now; 17411d7734a0SLars Ellenberg } 1742b30ab791SAndreas Gruenbacher _drbd_pause_after(device); 17435ab7d2c0SLars Ellenberg /* Forget potentially stale cached per resync extent bit-counts. 17445ab7d2c0SLars Ellenberg * Open coded drbd_rs_cancel_all(device), we already have IRQs 17455ab7d2c0SLars Ellenberg * disabled, and know the disk state is ok. */ 17465ab7d2c0SLars Ellenberg spin_lock(&device->al_lock); 17475ab7d2c0SLars Ellenberg lc_reset(device->resync); 17485ab7d2c0SLars Ellenberg device->resync_locked = 0; 17495ab7d2c0SLars Ellenberg device->resync_wenr = LC_FREE; 17505ab7d2c0SLars Ellenberg spin_unlock(&device->al_lock); 1751b411b363SPhilipp Reisner } 1752074f4afeSLars Ellenberg write_unlock(&global_state_lock); 1753074f4afeSLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 17545a22db89SLars Ellenberg 17556c922ed5SLars Ellenberg if (r == SS_SUCCESS) { 17565ab7d2c0SLars Ellenberg wake_up(&device->al_wait); /* for lc_reset() above */ 1757328e0f12SPhilipp Reisner /* reset rs_last_bcast when a resync or verify is started, 1758328e0f12SPhilipp Reisner * to deal with potential jiffies wrap. */ 1759b30ab791SAndreas Gruenbacher device->rs_last_bcast = jiffies - HZ; 1760328e0f12SPhilipp Reisner 1761d0180171SAndreas Gruenbacher drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", 17626c922ed5SLars Ellenberg drbd_conn_str(ns.conn), 1763b30ab791SAndreas Gruenbacher (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), 1764b30ab791SAndreas Gruenbacher (unsigned long) device->rs_total); 17655a22db89SLars Ellenberg if (side == C_SYNC_TARGET) 1766b30ab791SAndreas Gruenbacher device->bm_resync_fo = 0; 17675a22db89SLars Ellenberg 17685a22db89SLars Ellenberg /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid 17695a22db89SLars Ellenberg * with w_send_oos, or the sync target will get confused as to 17705a22db89SLars Ellenberg * how much bits to resync. We cannot do that always, because for an 17715a22db89SLars Ellenberg * empty resync and protocol < 95, we need to do it here, as we call 17725a22db89SLars Ellenberg * drbd_resync_finished from here in that case. 17735a22db89SLars Ellenberg * We drbd_gen_and_send_sync_uuid here for protocol < 96, 17745a22db89SLars Ellenberg * and from after_state_ch otherwise. */ 177544a4d551SLars Ellenberg if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96) 177644a4d551SLars Ellenberg drbd_gen_and_send_sync_uuid(peer_device); 1777b411b363SPhilipp Reisner 177844a4d551SLars Ellenberg if (connection->agreed_pro_version < 95 && device->rs_total == 0) { 1779af85e8e8SLars Ellenberg /* This still has a race (about when exactly the peers 1780af85e8e8SLars Ellenberg * detect connection loss) that can lead to a full sync 1781af85e8e8SLars Ellenberg * on next handshake. In 8.3.9 we fixed this with explicit 1782af85e8e8SLars Ellenberg * resync-finished notifications, but the fix 1783af85e8e8SLars Ellenberg * introduces a protocol change. Sleeping for some 1784af85e8e8SLars Ellenberg * time longer than the ping interval + timeout on the 1785af85e8e8SLars Ellenberg * SyncSource, to give the SyncTarget the chance to 1786af85e8e8SLars Ellenberg * detect connection loss, then waiting for a ping 1787af85e8e8SLars Ellenberg * response (implicit in drbd_resync_finished) reduces 1788af85e8e8SLars Ellenberg * the race considerably, but does not solve it. */ 178944ed167dSPhilipp Reisner if (side == C_SYNC_SOURCE) { 179044ed167dSPhilipp Reisner struct net_conf *nc; 179144ed167dSPhilipp Reisner int timeo; 179244ed167dSPhilipp Reisner 179344ed167dSPhilipp Reisner rcu_read_lock(); 179444a4d551SLars Ellenberg nc = rcu_dereference(connection->net_conf); 179544ed167dSPhilipp Reisner timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; 179644ed167dSPhilipp Reisner rcu_read_unlock(); 179744ed167dSPhilipp Reisner schedule_timeout_interruptible(timeo); 179844ed167dSPhilipp Reisner } 1799b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 1800b411b363SPhilipp Reisner } 1801b411b363SPhilipp Reisner 1802b30ab791SAndreas Gruenbacher drbd_rs_controller_reset(device); 1803b30ab791SAndreas Gruenbacher /* ns.conn may already be != device->state.conn, 1804b411b363SPhilipp Reisner * we may have been paused in between, or become paused until 1805b411b363SPhilipp Reisner * the timer triggers. 1806b411b363SPhilipp Reisner * No matter, that is handled in resync_timer_fn() */ 1807b411b363SPhilipp Reisner if (ns.conn == C_SYNC_TARGET) 1808b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies); 1809b411b363SPhilipp Reisner 1810b30ab791SAndreas Gruenbacher drbd_md_sync(device); 1811b411b363SPhilipp Reisner } 1812b30ab791SAndreas Gruenbacher put_ldev(device); 1813b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 1814b411b363SPhilipp Reisner } 1815b411b363SPhilipp Reisner 1816c7a58db4SLars Ellenberg static void update_on_disk_bitmap(struct drbd_device *device) 1817c7a58db4SLars Ellenberg { 1818c7a58db4SLars Ellenberg struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; 18195ab7d2c0SLars Ellenberg bool resync_done = test_and_clear_bit(RS_DONE, &device->flags); 1820c7a58db4SLars Ellenberg device->rs_last_bcast = jiffies; 1821c7a58db4SLars Ellenberg 1822c7a58db4SLars Ellenberg if (!get_ldev(device)) 1823c7a58db4SLars Ellenberg return; 1824c7a58db4SLars Ellenberg 1825c7a58db4SLars Ellenberg drbd_bm_write_lazy(device, 0); 18265ab7d2c0SLars Ellenberg if (resync_done && is_sync_state(device->state.conn)) 1827c7a58db4SLars Ellenberg drbd_resync_finished(device); 18285ab7d2c0SLars Ellenberg 1829c7a58db4SLars Ellenberg drbd_bcast_event(device, &sib); 1830c7a58db4SLars Ellenberg /* update timestamp, in case it took a while to write out stuff */ 1831c7a58db4SLars Ellenberg device->rs_last_bcast = jiffies; 1832c7a58db4SLars Ellenberg put_ldev(device); 1833c7a58db4SLars Ellenberg } 1834c7a58db4SLars Ellenberg 1835c7a58db4SLars Ellenberg static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection) 1836c7a58db4SLars Ellenberg { 1837c7a58db4SLars Ellenberg struct drbd_peer_device *peer_device; 1838c7a58db4SLars Ellenberg int vnr; 1839c7a58db4SLars Ellenberg 1840c7a58db4SLars Ellenberg rcu_read_lock(); 1841c7a58db4SLars Ellenberg idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1842c7a58db4SLars Ellenberg struct drbd_device *device = peer_device->device; 18435ab7d2c0SLars Ellenberg if (!test_and_clear_bit(RS_PROGRESS, &device->flags)) 1844c7a58db4SLars Ellenberg continue; 18455ab7d2c0SLars Ellenberg 1846c7a58db4SLars Ellenberg kref_get(&device->kref); 1847c7a58db4SLars Ellenberg rcu_read_unlock(); 1848c7a58db4SLars Ellenberg update_on_disk_bitmap(device); 1849c7a58db4SLars Ellenberg kref_put(&device->kref, drbd_destroy_device); 1850c7a58db4SLars Ellenberg rcu_read_lock(); 1851c7a58db4SLars Ellenberg } 1852c7a58db4SLars Ellenberg rcu_read_unlock(); 1853c7a58db4SLars Ellenberg } 1854c7a58db4SLars Ellenberg 1855a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) 18568c0785a5SLars Ellenberg { 18578c0785a5SLars Ellenberg spin_lock_irq(&queue->q_lock); 18588c0785a5SLars Ellenberg list_splice_init(&queue->q, work_list); 18598c0785a5SLars Ellenberg spin_unlock_irq(&queue->q_lock); 18608c0785a5SLars Ellenberg return !list_empty(work_list); 18618c0785a5SLars Ellenberg } 18628c0785a5SLars Ellenberg 1863a186e478SRashika Kheria static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list) 18648c0785a5SLars Ellenberg { 18658c0785a5SLars Ellenberg spin_lock_irq(&queue->q_lock); 18668c0785a5SLars Ellenberg if (!list_empty(&queue->q)) 18678c0785a5SLars Ellenberg list_move(queue->q.next, work_list); 18688c0785a5SLars Ellenberg spin_unlock_irq(&queue->q_lock); 18698c0785a5SLars Ellenberg return !list_empty(work_list); 18708c0785a5SLars Ellenberg } 18718c0785a5SLars Ellenberg 1872bde89a9eSAndreas Gruenbacher static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list) 1873b6dd1a89SLars Ellenberg { 1874b6dd1a89SLars Ellenberg DEFINE_WAIT(wait); 1875b6dd1a89SLars Ellenberg struct net_conf *nc; 1876b6dd1a89SLars Ellenberg int uncork, cork; 1877b6dd1a89SLars Ellenberg 1878b6dd1a89SLars Ellenberg dequeue_work_item(&connection->sender_work, work_list); 1879b6dd1a89SLars Ellenberg if (!list_empty(work_list)) 1880b6dd1a89SLars Ellenberg return; 1881b6dd1a89SLars Ellenberg 1882b6dd1a89SLars Ellenberg /* Still nothing to do? 1883b6dd1a89SLars Ellenberg * Maybe we still need to close the current epoch, 1884b6dd1a89SLars Ellenberg * even if no new requests are queued yet. 1885b6dd1a89SLars Ellenberg * 1886b6dd1a89SLars Ellenberg * Also, poke TCP, just in case. 1887b6dd1a89SLars Ellenberg * Then wait for new work (or signal). */ 1888b6dd1a89SLars Ellenberg rcu_read_lock(); 1889b6dd1a89SLars Ellenberg nc = rcu_dereference(connection->net_conf); 1890b6dd1a89SLars Ellenberg uncork = nc ? nc->tcp_cork : 0; 1891b6dd1a89SLars Ellenberg rcu_read_unlock(); 1892b6dd1a89SLars Ellenberg if (uncork) { 1893b6dd1a89SLars Ellenberg mutex_lock(&connection->data.mutex); 1894b6dd1a89SLars Ellenberg if (connection->data.socket) 1895b6dd1a89SLars Ellenberg drbd_tcp_uncork(connection->data.socket); 1896b6dd1a89SLars Ellenberg mutex_unlock(&connection->data.mutex); 1897b6dd1a89SLars Ellenberg } 1898b6dd1a89SLars Ellenberg 1899b6dd1a89SLars Ellenberg for (;;) { 1900b6dd1a89SLars Ellenberg int send_barrier; 1901b6dd1a89SLars Ellenberg prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); 19020500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock); 1903b6dd1a89SLars Ellenberg spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 1904bc317a9eSLars Ellenberg /* dequeue single item only, 1905bc317a9eSLars Ellenberg * we still use drbd_queue_work_front() in some places */ 1906bc317a9eSLars Ellenberg if (!list_empty(&connection->sender_work.q)) 1907bc317a9eSLars Ellenberg list_move(connection->sender_work.q.next, work_list); 1908b6dd1a89SLars Ellenberg spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 1909b6dd1a89SLars Ellenberg if (!list_empty(work_list) || signal_pending(current)) { 19100500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 1911b6dd1a89SLars Ellenberg break; 1912b6dd1a89SLars Ellenberg } 1913f9c78128SLars Ellenberg 1914f9c78128SLars Ellenberg /* We found nothing new to do, no to-be-communicated request, 1915f9c78128SLars Ellenberg * no other work item. We may still need to close the last 1916f9c78128SLars Ellenberg * epoch. Next incoming request epoch will be connection -> 1917f9c78128SLars Ellenberg * current transfer log epoch number. If that is different 1918f9c78128SLars Ellenberg * from the epoch of the last request we communicated, it is 1919f9c78128SLars Ellenberg * safe to send the epoch separating barrier now. 1920f9c78128SLars Ellenberg */ 1921f9c78128SLars Ellenberg send_barrier = 1922f9c78128SLars Ellenberg atomic_read(&connection->current_tle_nr) != 1923f9c78128SLars Ellenberg connection->send.current_epoch_nr; 19240500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 1925f9c78128SLars Ellenberg 1926f9c78128SLars Ellenberg if (send_barrier) 1927f9c78128SLars Ellenberg maybe_send_barrier(connection, 1928f9c78128SLars Ellenberg connection->send.current_epoch_nr + 1); 19295ab7d2c0SLars Ellenberg 19305ab7d2c0SLars Ellenberg if (test_bit(CONN_RS_PROGRESS, &connection->flags)) 19315ab7d2c0SLars Ellenberg break; 19325ab7d2c0SLars Ellenberg 1933a80ca1aeSLars Ellenberg /* drbd_send() may have called flush_signals() */ 1934a80ca1aeSLars Ellenberg if (get_t_state(&connection->worker) != RUNNING) 1935a80ca1aeSLars Ellenberg break; 19365ab7d2c0SLars Ellenberg 1937b6dd1a89SLars Ellenberg schedule(); 1938b6dd1a89SLars Ellenberg /* may be woken up for other things but new work, too, 1939b6dd1a89SLars Ellenberg * e.g. if the current epoch got closed. 1940b6dd1a89SLars Ellenberg * In which case we send the barrier above. */ 1941b6dd1a89SLars Ellenberg } 1942b6dd1a89SLars Ellenberg finish_wait(&connection->sender_work.q_wait, &wait); 1943b6dd1a89SLars Ellenberg 1944b6dd1a89SLars Ellenberg /* someone may have changed the config while we have been waiting above. */ 1945b6dd1a89SLars Ellenberg rcu_read_lock(); 1946b6dd1a89SLars Ellenberg nc = rcu_dereference(connection->net_conf); 1947b6dd1a89SLars Ellenberg cork = nc ? nc->tcp_cork : 0; 1948b6dd1a89SLars Ellenberg rcu_read_unlock(); 1949b6dd1a89SLars Ellenberg mutex_lock(&connection->data.mutex); 1950b6dd1a89SLars Ellenberg if (connection->data.socket) { 1951b6dd1a89SLars Ellenberg if (cork) 1952b6dd1a89SLars Ellenberg drbd_tcp_cork(connection->data.socket); 1953b6dd1a89SLars Ellenberg else if (!uncork) 1954b6dd1a89SLars Ellenberg drbd_tcp_uncork(connection->data.socket); 1955b6dd1a89SLars Ellenberg } 1956b6dd1a89SLars Ellenberg mutex_unlock(&connection->data.mutex); 1957b6dd1a89SLars Ellenberg } 1958b6dd1a89SLars Ellenberg 1959b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi) 1960b411b363SPhilipp Reisner { 1961bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 19626db7e50aSAndreas Gruenbacher struct drbd_work *w = NULL; 1963c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 1964b411b363SPhilipp Reisner LIST_HEAD(work_list); 19658c0785a5SLars Ellenberg int vnr; 1966b411b363SPhilipp Reisner 1967e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) { 196880822284SPhilipp Reisner drbd_thread_current_set_cpu(thi); 1969b411b363SPhilipp Reisner 19708c0785a5SLars Ellenberg /* as long as we use drbd_queue_work_front(), 19718c0785a5SLars Ellenberg * we may only dequeue single work items here, not batches. */ 19728c0785a5SLars Ellenberg if (list_empty(&work_list)) 1973bde89a9eSAndreas Gruenbacher wait_for_work(connection, &work_list); 1974b411b363SPhilipp Reisner 19755ab7d2c0SLars Ellenberg if (test_and_clear_bit(CONN_RS_PROGRESS, &connection->flags)) 19765ab7d2c0SLars Ellenberg try_update_all_on_disk_bitmaps(connection); 19775ab7d2c0SLars Ellenberg 19788c0785a5SLars Ellenberg if (signal_pending(current)) { 1979b411b363SPhilipp Reisner flush_signals(current); 198019393e10SPhilipp Reisner if (get_t_state(thi) == RUNNING) { 19811ec861ebSAndreas Gruenbacher drbd_warn(connection, "Worker got an unexpected signal\n"); 1982b411b363SPhilipp Reisner continue; 198319393e10SPhilipp Reisner } 1984b411b363SPhilipp Reisner break; 1985b411b363SPhilipp Reisner } 1986b411b363SPhilipp Reisner 1987e77a0a5cSAndreas Gruenbacher if (get_t_state(thi) != RUNNING) 1988b411b363SPhilipp Reisner break; 1989b411b363SPhilipp Reisner 19908c0785a5SLars Ellenberg while (!list_empty(&work_list)) { 19916db7e50aSAndreas Gruenbacher w = list_first_entry(&work_list, struct drbd_work, list); 19926db7e50aSAndreas Gruenbacher list_del_init(&w->list); 19936db7e50aSAndreas Gruenbacher if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) 19948c0785a5SLars Ellenberg continue; 1995bde89a9eSAndreas Gruenbacher if (connection->cstate >= C_WF_REPORT_PARAMS) 1996bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 1997b411b363SPhilipp Reisner } 1998b411b363SPhilipp Reisner } 1999b411b363SPhilipp Reisner 20008c0785a5SLars Ellenberg do { 2001b411b363SPhilipp Reisner while (!list_empty(&work_list)) { 20026db7e50aSAndreas Gruenbacher w = list_first_entry(&work_list, struct drbd_work, list); 20036db7e50aSAndreas Gruenbacher list_del_init(&w->list); 20046db7e50aSAndreas Gruenbacher w->cb(w, 1); 2005b411b363SPhilipp Reisner } 2006bde89a9eSAndreas Gruenbacher dequeue_work_batch(&connection->sender_work, &work_list); 20078c0785a5SLars Ellenberg } while (!list_empty(&work_list)); 2008b411b363SPhilipp Reisner 2009c141ebdaSPhilipp Reisner rcu_read_lock(); 2010c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2011c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 20120b0ba1efSAndreas Gruenbacher D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE); 2013b30ab791SAndreas Gruenbacher kref_get(&device->kref); 2014c141ebdaSPhilipp Reisner rcu_read_unlock(); 2015b30ab791SAndreas Gruenbacher drbd_device_cleanup(device); 201605a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 2017c141ebdaSPhilipp Reisner rcu_read_lock(); 20180e29d163SPhilipp Reisner } 2019c141ebdaSPhilipp Reisner rcu_read_unlock(); 2020b411b363SPhilipp Reisner 2021b411b363SPhilipp Reisner return 0; 2022b411b363SPhilipp Reisner } 2023