1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_worker.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner 24b411b363SPhilipp Reisner */ 25b411b363SPhilipp Reisner 26b411b363SPhilipp Reisner #include <linux/module.h> 27b411b363SPhilipp Reisner #include <linux/drbd.h> 28b411b363SPhilipp Reisner #include <linux/sched.h> 29b411b363SPhilipp Reisner #include <linux/wait.h> 30b411b363SPhilipp Reisner #include <linux/mm.h> 31b411b363SPhilipp Reisner #include <linux/memcontrol.h> 32b411b363SPhilipp Reisner #include <linux/mm_inline.h> 33b411b363SPhilipp Reisner #include <linux/slab.h> 34b411b363SPhilipp Reisner #include <linux/random.h> 35b411b363SPhilipp Reisner #include <linux/string.h> 36b411b363SPhilipp Reisner #include <linux/scatterlist.h> 37b411b363SPhilipp Reisner 38b411b363SPhilipp Reisner #include "drbd_int.h" 39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h" 40b411b363SPhilipp Reisner #include "drbd_req.h" 41b411b363SPhilipp Reisner 42d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *, int); 43d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *, int); 44b411b363SPhilipp Reisner 45c5a91619SAndreas Gruenbacher /* endio handlers: 46c5a91619SAndreas Gruenbacher * drbd_md_io_complete (defined here) 47fcefa62eSAndreas Gruenbacher * drbd_request_endio (defined here) 48fcefa62eSAndreas Gruenbacher * drbd_peer_request_endio (defined here) 49c5a91619SAndreas Gruenbacher * bm_async_io_complete (defined in drbd_bitmap.c) 50c5a91619SAndreas Gruenbacher * 51b411b363SPhilipp Reisner * For all these callbacks, note the following: 52b411b363SPhilipp Reisner * The callbacks will be called in irq context by the IDE drivers, 53b411b363SPhilipp Reisner * and in Softirqs/Tasklets/BH context by the SCSI drivers. 54b411b363SPhilipp Reisner * Try to get the locking right :) 55b411b363SPhilipp Reisner * 56b411b363SPhilipp Reisner */ 57b411b363SPhilipp Reisner 58b411b363SPhilipp Reisner 59b411b363SPhilipp Reisner /* About the global_state_lock 60b411b363SPhilipp Reisner Each state transition on an device holds a read lock. In case we have 6195f8efd0SAndreas Gruenbacher to evaluate the resync after dependencies, we grab a write lock, because 62b411b363SPhilipp Reisner we need stable states on all devices for that. */ 63b411b363SPhilipp Reisner rwlock_t global_state_lock; 64b411b363SPhilipp Reisner 65b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO 66b411b363SPhilipp Reisner * submitted by drbd_md_sync_page_io() 67b411b363SPhilipp Reisner */ 68b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error) 69b411b363SPhilipp Reisner { 70b411b363SPhilipp Reisner struct drbd_md_io *md_io; 71b30ab791SAndreas Gruenbacher struct drbd_device *device; 72b411b363SPhilipp Reisner 73b411b363SPhilipp Reisner md_io = (struct drbd_md_io *)bio->bi_private; 74b30ab791SAndreas Gruenbacher device = container_of(md_io, struct drbd_device, md_io); 75cdfda633SPhilipp Reisner 76b411b363SPhilipp Reisner md_io->error = error; 77b411b363SPhilipp Reisner 780cfac5ddSPhilipp Reisner /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able 790cfac5ddSPhilipp Reisner * to timeout on the lower level device, and eventually detach from it. 800cfac5ddSPhilipp Reisner * If this io completion runs after that timeout expired, this 810cfac5ddSPhilipp Reisner * drbd_md_put_buffer() may allow us to finally try and re-attach. 820cfac5ddSPhilipp Reisner * During normal operation, this only puts that extra reference 830cfac5ddSPhilipp Reisner * down to 1 again. 840cfac5ddSPhilipp Reisner * Make sure we first drop the reference, and only then signal 850cfac5ddSPhilipp Reisner * completion, or we may (in drbd_al_read_log()) cycle so fast into the 860cfac5ddSPhilipp Reisner * next drbd_md_sync_page_io(), that we trigger the 87b30ab791SAndreas Gruenbacher * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. 880cfac5ddSPhilipp Reisner */ 89b30ab791SAndreas Gruenbacher drbd_md_put_buffer(device); 90cdfda633SPhilipp Reisner md_io->done = 1; 91b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 92cdfda633SPhilipp Reisner bio_put(bio); 93b30ab791SAndreas Gruenbacher if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ 94b30ab791SAndreas Gruenbacher put_ldev(device); 95b411b363SPhilipp Reisner } 96b411b363SPhilipp Reisner 97b411b363SPhilipp Reisner /* reads on behalf of the partner, 98b411b363SPhilipp Reisner * "submitted" by the receiver 99b411b363SPhilipp Reisner */ 100a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) 101b411b363SPhilipp Reisner { 102b411b363SPhilipp Reisner unsigned long flags = 0; 1036780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1046780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 105b411b363SPhilipp Reisner 1060500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 107b30ab791SAndreas Gruenbacher device->read_cnt += peer_req->i.size >> 9; 108a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 109b30ab791SAndreas Gruenbacher if (list_empty(&device->read_ee)) 110b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 111db830c46SAndreas Gruenbacher if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) 112b30ab791SAndreas Gruenbacher __drbd_chk_io_error(device, DRBD_READ_ERROR); 1130500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 114b411b363SPhilipp Reisner 1156780139cSAndreas Gruenbacher drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w); 116b30ab791SAndreas Gruenbacher put_ldev(device); 117b411b363SPhilipp Reisner } 118b411b363SPhilipp Reisner 119b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes, 12045bb912bSLars Ellenberg * "submitted" by the receiver, final stage. */ 121a0fb3c47SLars Ellenberg void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) 122b411b363SPhilipp Reisner { 123b411b363SPhilipp Reisner unsigned long flags = 0; 1246780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1256780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 126181286adSLars Ellenberg struct drbd_interval i; 127b411b363SPhilipp Reisner int do_wake; 128579b57edSAndreas Gruenbacher u64 block_id; 129b411b363SPhilipp Reisner int do_al_complete_io; 130b411b363SPhilipp Reisner 131db830c46SAndreas Gruenbacher /* after we moved peer_req to done_ee, 132b411b363SPhilipp Reisner * we may no longer access it, 133b411b363SPhilipp Reisner * it may be freed/reused already! 134b411b363SPhilipp Reisner * (as soon as we release the req_lock) */ 135181286adSLars Ellenberg i = peer_req->i; 136db830c46SAndreas Gruenbacher do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; 137db830c46SAndreas Gruenbacher block_id = peer_req->block_id; 138b411b363SPhilipp Reisner 1390500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 140b30ab791SAndreas Gruenbacher device->writ_cnt += peer_req->i.size >> 9; 141a8cd15baSAndreas Gruenbacher list_move_tail(&peer_req->w.list, &device->done_ee); 142b411b363SPhilipp Reisner 143bb3bfe96SAndreas Gruenbacher /* 1445e472264SAndreas Gruenbacher * Do not remove from the write_requests tree here: we did not send the 145bb3bfe96SAndreas Gruenbacher * Ack yet and did not wake possibly waiting conflicting requests. 146bb3bfe96SAndreas Gruenbacher * Removed from the tree from "drbd_process_done_ee" within the 14784b8c06bSAndreas Gruenbacher * appropriate dw.cb (e_end_block/e_end_resync_block) or from 148bb3bfe96SAndreas Gruenbacher * _drbd_clear_done_ee. 149bb3bfe96SAndreas Gruenbacher */ 150b411b363SPhilipp Reisner 151b30ab791SAndreas Gruenbacher do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); 152b411b363SPhilipp Reisner 153a0fb3c47SLars Ellenberg /* FIXME do we want to detach for failed REQ_DISCARD? 154a0fb3c47SLars Ellenberg * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ 155a0fb3c47SLars Ellenberg if (peer_req->flags & EE_WAS_ERROR) 156b30ab791SAndreas Gruenbacher __drbd_chk_io_error(device, DRBD_WRITE_ERROR); 1570500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 158b411b363SPhilipp Reisner 159579b57edSAndreas Gruenbacher if (block_id == ID_SYNCER) 160b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, i.sector); 161b411b363SPhilipp Reisner 162b411b363SPhilipp Reisner if (do_wake) 163b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 164b411b363SPhilipp Reisner 165b411b363SPhilipp Reisner if (do_al_complete_io) 166b30ab791SAndreas Gruenbacher drbd_al_complete_io(device, &i); 167b411b363SPhilipp Reisner 1686780139cSAndreas Gruenbacher wake_asender(peer_device->connection); 169b30ab791SAndreas Gruenbacher put_ldev(device); 17045bb912bSLars Ellenberg } 171b411b363SPhilipp Reisner 17245bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes, 17345bb912bSLars Ellenberg * "submitted" by the receiver. 17445bb912bSLars Ellenberg */ 175fcefa62eSAndreas Gruenbacher void drbd_peer_request_endio(struct bio *bio, int error) 17645bb912bSLars Ellenberg { 177db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req = bio->bi_private; 178a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_req->peer_device->device; 17945bb912bSLars Ellenberg int uptodate = bio_flagged(bio, BIO_UPTODATE); 18045bb912bSLars Ellenberg int is_write = bio_data_dir(bio) == WRITE; 181a0fb3c47SLars Ellenberg int is_discard = !!(bio->bi_rw & REQ_DISCARD); 18245bb912bSLars Ellenberg 18307194272SLars Ellenberg if (error && __ratelimit(&drbd_ratelimit_state)) 184d0180171SAndreas Gruenbacher drbd_warn(device, "%s: error=%d s=%llus\n", 185a0fb3c47SLars Ellenberg is_write ? (is_discard ? "discard" : "write") 186a0fb3c47SLars Ellenberg : "read", error, 187db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 18845bb912bSLars Ellenberg if (!error && !uptodate) { 18907194272SLars Ellenberg if (__ratelimit(&drbd_ratelimit_state)) 190d0180171SAndreas Gruenbacher drbd_warn(device, "%s: setting error to -EIO s=%llus\n", 19145bb912bSLars Ellenberg is_write ? "write" : "read", 192db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 19345bb912bSLars Ellenberg /* strange behavior of some lower level drivers... 19445bb912bSLars Ellenberg * fail the request by clearing the uptodate flag, 19545bb912bSLars Ellenberg * but do not return any error?! */ 19645bb912bSLars Ellenberg error = -EIO; 19745bb912bSLars Ellenberg } 19845bb912bSLars Ellenberg 19945bb912bSLars Ellenberg if (error) 200db830c46SAndreas Gruenbacher set_bit(__EE_WAS_ERROR, &peer_req->flags); 20145bb912bSLars Ellenberg 20245bb912bSLars Ellenberg bio_put(bio); /* no need for the bio anymore */ 203db830c46SAndreas Gruenbacher if (atomic_dec_and_test(&peer_req->pending_bios)) { 20445bb912bSLars Ellenberg if (is_write) 205db830c46SAndreas Gruenbacher drbd_endio_write_sec_final(peer_req); 20645bb912bSLars Ellenberg else 207db830c46SAndreas Gruenbacher drbd_endio_read_sec_final(peer_req); 20845bb912bSLars Ellenberg } 209b411b363SPhilipp Reisner } 210b411b363SPhilipp Reisner 211b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request 212b411b363SPhilipp Reisner */ 213fcefa62eSAndreas Gruenbacher void drbd_request_endio(struct bio *bio, int error) 214b411b363SPhilipp Reisner { 215a115413dSLars Ellenberg unsigned long flags; 216b411b363SPhilipp Reisner struct drbd_request *req = bio->bi_private; 21784b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 218a115413dSLars Ellenberg struct bio_and_error m; 219b411b363SPhilipp Reisner enum drbd_req_event what; 220b411b363SPhilipp Reisner int uptodate = bio_flagged(bio, BIO_UPTODATE); 221b411b363SPhilipp Reisner 222b411b363SPhilipp Reisner if (!error && !uptodate) { 223d0180171SAndreas Gruenbacher drbd_warn(device, "p %s: setting error to -EIO\n", 224b411b363SPhilipp Reisner bio_data_dir(bio) == WRITE ? "write" : "read"); 225b411b363SPhilipp Reisner /* strange behavior of some lower level drivers... 226b411b363SPhilipp Reisner * fail the request by clearing the uptodate flag, 227b411b363SPhilipp Reisner * but do not return any error?! */ 228b411b363SPhilipp Reisner error = -EIO; 229b411b363SPhilipp Reisner } 230b411b363SPhilipp Reisner 2311b6dd252SPhilipp Reisner 2321b6dd252SPhilipp Reisner /* If this request was aborted locally before, 2331b6dd252SPhilipp Reisner * but now was completed "successfully", 2341b6dd252SPhilipp Reisner * chances are that this caused arbitrary data corruption. 2351b6dd252SPhilipp Reisner * 2361b6dd252SPhilipp Reisner * "aborting" requests, or force-detaching the disk, is intended for 2371b6dd252SPhilipp Reisner * completely blocked/hung local backing devices which do no longer 2381b6dd252SPhilipp Reisner * complete requests at all, not even do error completions. In this 2391b6dd252SPhilipp Reisner * situation, usually a hard-reset and failover is the only way out. 2401b6dd252SPhilipp Reisner * 2411b6dd252SPhilipp Reisner * By "aborting", basically faking a local error-completion, 2421b6dd252SPhilipp Reisner * we allow for a more graceful swichover by cleanly migrating services. 2431b6dd252SPhilipp Reisner * Still the affected node has to be rebooted "soon". 2441b6dd252SPhilipp Reisner * 2451b6dd252SPhilipp Reisner * By completing these requests, we allow the upper layers to re-use 2461b6dd252SPhilipp Reisner * the associated data pages. 2471b6dd252SPhilipp Reisner * 2481b6dd252SPhilipp Reisner * If later the local backing device "recovers", and now DMAs some data 2491b6dd252SPhilipp Reisner * from disk into the original request pages, in the best case it will 2501b6dd252SPhilipp Reisner * just put random data into unused pages; but typically it will corrupt 2511b6dd252SPhilipp Reisner * meanwhile completely unrelated data, causing all sorts of damage. 2521b6dd252SPhilipp Reisner * 2531b6dd252SPhilipp Reisner * Which means delayed successful completion, 2541b6dd252SPhilipp Reisner * especially for READ requests, 2551b6dd252SPhilipp Reisner * is a reason to panic(). 2561b6dd252SPhilipp Reisner * 2571b6dd252SPhilipp Reisner * We assume that a delayed *error* completion is OK, 2581b6dd252SPhilipp Reisner * though we still will complain noisily about it. 2591b6dd252SPhilipp Reisner */ 2601b6dd252SPhilipp Reisner if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { 2611b6dd252SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 262d0180171SAndreas Gruenbacher drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); 2631b6dd252SPhilipp Reisner 2641b6dd252SPhilipp Reisner if (!error) 2651b6dd252SPhilipp Reisner panic("possible random memory corruption caused by delayed completion of aborted local request\n"); 2661b6dd252SPhilipp Reisner } 2671b6dd252SPhilipp Reisner 268b411b363SPhilipp Reisner /* to avoid recursion in __req_mod */ 269b411b363SPhilipp Reisner if (unlikely(error)) { 2702f632aebSLars Ellenberg if (bio->bi_rw & REQ_DISCARD) 2712f632aebSLars Ellenberg what = (error == -EOPNOTSUPP) 2722f632aebSLars Ellenberg ? DISCARD_COMPLETED_NOTSUPP 2732f632aebSLars Ellenberg : DISCARD_COMPLETED_WITH_ERROR; 2742f632aebSLars Ellenberg else 275b411b363SPhilipp Reisner what = (bio_data_dir(bio) == WRITE) 2768554df1cSAndreas Gruenbacher ? WRITE_COMPLETED_WITH_ERROR 2775c3c7e64SLars Ellenberg : (bio_rw(bio) == READ) 2788554df1cSAndreas Gruenbacher ? READ_COMPLETED_WITH_ERROR 2798554df1cSAndreas Gruenbacher : READ_AHEAD_COMPLETED_WITH_ERROR; 280b411b363SPhilipp Reisner } else 2818554df1cSAndreas Gruenbacher what = COMPLETED_OK; 282b411b363SPhilipp Reisner 283b411b363SPhilipp Reisner bio_put(req->private_bio); 284b411b363SPhilipp Reisner req->private_bio = ERR_PTR(error); 285b411b363SPhilipp Reisner 286a115413dSLars Ellenberg /* not req_mod(), we need irqsave here! */ 2870500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 288a115413dSLars Ellenberg __req_mod(req, what, &m); 2890500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 290b30ab791SAndreas Gruenbacher put_ldev(device); 291a115413dSLars Ellenberg 292a115413dSLars Ellenberg if (m.bio) 293b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 294b411b363SPhilipp Reisner } 295b411b363SPhilipp Reisner 29679a3c8d3SAndreas Gruenbacher void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest) 29745bb912bSLars Ellenberg { 29845bb912bSLars Ellenberg struct hash_desc desc; 29945bb912bSLars Ellenberg struct scatterlist sg; 300db830c46SAndreas Gruenbacher struct page *page = peer_req->pages; 30145bb912bSLars Ellenberg struct page *tmp; 30245bb912bSLars Ellenberg unsigned len; 30345bb912bSLars Ellenberg 30445bb912bSLars Ellenberg desc.tfm = tfm; 30545bb912bSLars Ellenberg desc.flags = 0; 30645bb912bSLars Ellenberg 30745bb912bSLars Ellenberg sg_init_table(&sg, 1); 30845bb912bSLars Ellenberg crypto_hash_init(&desc); 30945bb912bSLars Ellenberg 31045bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) { 31145bb912bSLars Ellenberg /* all but the last page will be fully used */ 31245bb912bSLars Ellenberg sg_set_page(&sg, page, PAGE_SIZE, 0); 31345bb912bSLars Ellenberg crypto_hash_update(&desc, &sg, sg.length); 31445bb912bSLars Ellenberg page = tmp; 31545bb912bSLars Ellenberg } 31645bb912bSLars Ellenberg /* and now the last, possibly only partially used page */ 317db830c46SAndreas Gruenbacher len = peer_req->i.size & (PAGE_SIZE - 1); 31845bb912bSLars Ellenberg sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); 31945bb912bSLars Ellenberg crypto_hash_update(&desc, &sg, sg.length); 32045bb912bSLars Ellenberg crypto_hash_final(&desc, digest); 32145bb912bSLars Ellenberg } 32245bb912bSLars Ellenberg 32379a3c8d3SAndreas Gruenbacher void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest) 324b411b363SPhilipp Reisner { 325b411b363SPhilipp Reisner struct hash_desc desc; 326b411b363SPhilipp Reisner struct scatterlist sg; 3277988613bSKent Overstreet struct bio_vec bvec; 3287988613bSKent Overstreet struct bvec_iter iter; 329b411b363SPhilipp Reisner 330b411b363SPhilipp Reisner desc.tfm = tfm; 331b411b363SPhilipp Reisner desc.flags = 0; 332b411b363SPhilipp Reisner 333b411b363SPhilipp Reisner sg_init_table(&sg, 1); 334b411b363SPhilipp Reisner crypto_hash_init(&desc); 335b411b363SPhilipp Reisner 3367988613bSKent Overstreet bio_for_each_segment(bvec, bio, iter) { 3377988613bSKent Overstreet sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 338b411b363SPhilipp Reisner crypto_hash_update(&desc, &sg, sg.length); 339b411b363SPhilipp Reisner } 340b411b363SPhilipp Reisner crypto_hash_final(&desc, digest); 341b411b363SPhilipp Reisner } 342b411b363SPhilipp Reisner 3439676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */ 34499920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel) 345b411b363SPhilipp Reisner { 346a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 3476780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 3486780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 349b411b363SPhilipp Reisner int digest_size; 350b411b363SPhilipp Reisner void *digest; 35199920dc5SAndreas Gruenbacher int err = 0; 352b411b363SPhilipp Reisner 35353ea4331SLars Ellenberg if (unlikely(cancel)) 35453ea4331SLars Ellenberg goto out; 355b411b363SPhilipp Reisner 3569676c760SLars Ellenberg if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) 35753ea4331SLars Ellenberg goto out; 35853ea4331SLars Ellenberg 3596780139cSAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm); 360b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 361b411b363SPhilipp Reisner if (digest) { 362db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 363db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 3646780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 3659676c760SLars Ellenberg /* Free peer_req and pages before send. 36653ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 36753ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 36853ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 369c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 370b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 371db830c46SAndreas Gruenbacher peer_req = NULL; 372b30ab791SAndreas Gruenbacher inc_rs_pending(device); 3736780139cSAndreas Gruenbacher err = drbd_send_drequest_csum(peer_device, sector, size, 37453ea4331SLars Ellenberg digest, digest_size, 375b411b363SPhilipp Reisner P_CSUM_RS_REQUEST); 376b411b363SPhilipp Reisner kfree(digest); 377b411b363SPhilipp Reisner } else { 378d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc() of digest failed.\n"); 37999920dc5SAndreas Gruenbacher err = -ENOMEM; 380b411b363SPhilipp Reisner } 381b411b363SPhilipp Reisner 38253ea4331SLars Ellenberg out: 383db830c46SAndreas Gruenbacher if (peer_req) 384b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 385b411b363SPhilipp Reisner 38699920dc5SAndreas Gruenbacher if (unlikely(err)) 387d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_drequest(..., csum) failed\n"); 38899920dc5SAndreas Gruenbacher return err; 389b411b363SPhilipp Reisner } 390b411b363SPhilipp Reisner 391b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 392b411b363SPhilipp Reisner 39369a22773SAndreas Gruenbacher static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size) 394b411b363SPhilipp Reisner { 39569a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 396db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 397b411b363SPhilipp Reisner 398b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 39980a40e43SLars Ellenberg return -EIO; 400b411b363SPhilipp Reisner 401b30ab791SAndreas Gruenbacher if (drbd_rs_should_slow_down(device, sector)) 4020f0601f4SLars Ellenberg goto defer; 4030f0601f4SLars Ellenberg 404b411b363SPhilipp Reisner /* GFP_TRY, because if there is no memory available right now, this may 405b411b363SPhilipp Reisner * be rescheduled for later. It is "only" background resync, after all. */ 40669a22773SAndreas Gruenbacher peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, 407a0fb3c47SLars Ellenberg size, true /* has real payload */, GFP_TRY); 408db830c46SAndreas Gruenbacher if (!peer_req) 40980a40e43SLars Ellenberg goto defer; 410b411b363SPhilipp Reisner 411a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_send_csum; 4120500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 413a8cd15baSAndreas Gruenbacher list_add(&peer_req->w.list, &device->read_ee); 4140500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 415b411b363SPhilipp Reisner 416b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_ev); 417b30ab791SAndreas Gruenbacher if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0) 41880a40e43SLars Ellenberg return 0; 41945bb912bSLars Ellenberg 42010f6d992SLars Ellenberg /* If it failed because of ENOMEM, retry should help. If it failed 42110f6d992SLars Ellenberg * because bio_add_page failed (probably broken lower level driver), 42210f6d992SLars Ellenberg * retry may or may not help. 42310f6d992SLars Ellenberg * If it does not, you may need to force disconnect. */ 4240500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 425a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 4260500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 42722cc37a9SLars Ellenberg 428b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 42980a40e43SLars Ellenberg defer: 430b30ab791SAndreas Gruenbacher put_ldev(device); 43180a40e43SLars Ellenberg return -EAGAIN; 432b411b363SPhilipp Reisner } 433b411b363SPhilipp Reisner 43499920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel) 435794abb75SPhilipp Reisner { 43684b8c06bSAndreas Gruenbacher struct drbd_device *device = 43784b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, resync_work); 43884b8c06bSAndreas Gruenbacher 439b30ab791SAndreas Gruenbacher switch (device->state.conn) { 440794abb75SPhilipp Reisner case C_VERIFY_S: 441d448a2e1SAndreas Gruenbacher make_ov_request(device, cancel); 442794abb75SPhilipp Reisner break; 443794abb75SPhilipp Reisner case C_SYNC_TARGET: 444d448a2e1SAndreas Gruenbacher make_resync_request(device, cancel); 445794abb75SPhilipp Reisner break; 446794abb75SPhilipp Reisner } 447794abb75SPhilipp Reisner 44899920dc5SAndreas Gruenbacher return 0; 449794abb75SPhilipp Reisner } 450794abb75SPhilipp Reisner 451b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data) 452b411b363SPhilipp Reisner { 453b30ab791SAndreas Gruenbacher struct drbd_device *device = (struct drbd_device *) data; 454b411b363SPhilipp Reisner 455b30ab791SAndreas Gruenbacher if (list_empty(&device->resync_work.list)) 45684b8c06bSAndreas Gruenbacher drbd_queue_work(&first_peer_device(device)->connection->sender_work, 45784b8c06bSAndreas Gruenbacher &device->resync_work); 458b411b363SPhilipp Reisner } 459b411b363SPhilipp Reisner 460778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value) 461778f271dSPhilipp Reisner { 462778f271dSPhilipp Reisner int i; 463778f271dSPhilipp Reisner 464778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 465f10f2623SPhilipp Reisner fb->values[i] = value; 466778f271dSPhilipp Reisner } 467778f271dSPhilipp Reisner 468778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value) 469778f271dSPhilipp Reisner { 470778f271dSPhilipp Reisner int ov; 471778f271dSPhilipp Reisner 472778f271dSPhilipp Reisner ov = fb->values[fb->head_index]; 473778f271dSPhilipp Reisner fb->values[fb->head_index++] = value; 474778f271dSPhilipp Reisner 475778f271dSPhilipp Reisner if (fb->head_index >= fb->size) 476778f271dSPhilipp Reisner fb->head_index = 0; 477778f271dSPhilipp Reisner 478778f271dSPhilipp Reisner return ov; 479778f271dSPhilipp Reisner } 480778f271dSPhilipp Reisner 481778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value) 482778f271dSPhilipp Reisner { 483778f271dSPhilipp Reisner int i; 484778f271dSPhilipp Reisner 485778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 486778f271dSPhilipp Reisner fb->values[i] += value; 487778f271dSPhilipp Reisner } 488778f271dSPhilipp Reisner 4899958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size) 4909958c857SPhilipp Reisner { 4919958c857SPhilipp Reisner struct fifo_buffer *fb; 4929958c857SPhilipp Reisner 4938747d30aSLars Ellenberg fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO); 4949958c857SPhilipp Reisner if (!fb) 4959958c857SPhilipp Reisner return NULL; 4969958c857SPhilipp Reisner 4979958c857SPhilipp Reisner fb->head_index = 0; 4989958c857SPhilipp Reisner fb->size = fifo_size; 4999958c857SPhilipp Reisner fb->total = 0; 5009958c857SPhilipp Reisner 5019958c857SPhilipp Reisner return fb; 5029958c857SPhilipp Reisner } 5039958c857SPhilipp Reisner 5040e49d7b0SLars Ellenberg static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) 505778f271dSPhilipp Reisner { 506daeda1ccSPhilipp Reisner struct disk_conf *dc; 507778f271dSPhilipp Reisner unsigned int want; /* The number of sectors we want in the proxy */ 508778f271dSPhilipp Reisner int req_sect; /* Number of sectors to request in this turn */ 509778f271dSPhilipp Reisner int correction; /* Number of sectors more we need in the proxy*/ 510778f271dSPhilipp Reisner int cps; /* correction per invocation of drbd_rs_controller() */ 511778f271dSPhilipp Reisner int steps; /* Number of time steps to plan ahead */ 512778f271dSPhilipp Reisner int curr_corr; 513778f271dSPhilipp Reisner int max_sect; 514813472ceSPhilipp Reisner struct fifo_buffer *plan; 515778f271dSPhilipp Reisner 516b30ab791SAndreas Gruenbacher dc = rcu_dereference(device->ldev->disk_conf); 517b30ab791SAndreas Gruenbacher plan = rcu_dereference(device->rs_plan_s); 518778f271dSPhilipp Reisner 519813472ceSPhilipp Reisner steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ 520778f271dSPhilipp Reisner 521b30ab791SAndreas Gruenbacher if (device->rs_in_flight + sect_in == 0) { /* At start of resync */ 522daeda1ccSPhilipp Reisner want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; 523778f271dSPhilipp Reisner } else { /* normal path */ 524daeda1ccSPhilipp Reisner want = dc->c_fill_target ? dc->c_fill_target : 525daeda1ccSPhilipp Reisner sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); 526778f271dSPhilipp Reisner } 527778f271dSPhilipp Reisner 528b30ab791SAndreas Gruenbacher correction = want - device->rs_in_flight - plan->total; 529778f271dSPhilipp Reisner 530778f271dSPhilipp Reisner /* Plan ahead */ 531778f271dSPhilipp Reisner cps = correction / steps; 532813472ceSPhilipp Reisner fifo_add_val(plan, cps); 533813472ceSPhilipp Reisner plan->total += cps * steps; 534778f271dSPhilipp Reisner 535778f271dSPhilipp Reisner /* What we do in this step */ 536813472ceSPhilipp Reisner curr_corr = fifo_push(plan, 0); 537813472ceSPhilipp Reisner plan->total -= curr_corr; 538778f271dSPhilipp Reisner 539778f271dSPhilipp Reisner req_sect = sect_in + curr_corr; 540778f271dSPhilipp Reisner if (req_sect < 0) 541778f271dSPhilipp Reisner req_sect = 0; 542778f271dSPhilipp Reisner 543daeda1ccSPhilipp Reisner max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ; 544778f271dSPhilipp Reisner if (req_sect > max_sect) 545778f271dSPhilipp Reisner req_sect = max_sect; 546778f271dSPhilipp Reisner 547778f271dSPhilipp Reisner /* 548d0180171SAndreas Gruenbacher drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", 549b30ab791SAndreas Gruenbacher sect_in, device->rs_in_flight, want, correction, 550b30ab791SAndreas Gruenbacher steps, cps, device->rs_planed, curr_corr, req_sect); 551778f271dSPhilipp Reisner */ 552778f271dSPhilipp Reisner 553778f271dSPhilipp Reisner return req_sect; 554778f271dSPhilipp Reisner } 555778f271dSPhilipp Reisner 556b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device) 557e65f440dSLars Ellenberg { 5580e49d7b0SLars Ellenberg unsigned int sect_in; /* Number of sectors that came in since the last turn */ 5590e49d7b0SLars Ellenberg int number, mxb; 5600e49d7b0SLars Ellenberg 5610e49d7b0SLars Ellenberg sect_in = atomic_xchg(&device->rs_sect_in, 0); 5620e49d7b0SLars Ellenberg device->rs_in_flight -= sect_in; 563813472ceSPhilipp Reisner 564813472ceSPhilipp Reisner rcu_read_lock(); 5650e49d7b0SLars Ellenberg mxb = drbd_get_max_buffers(device) / 2; 566b30ab791SAndreas Gruenbacher if (rcu_dereference(device->rs_plan_s)->size) { 5670e49d7b0SLars Ellenberg number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); 568b30ab791SAndreas Gruenbacher device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; 569e65f440dSLars Ellenberg } else { 570b30ab791SAndreas Gruenbacher device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; 571b30ab791SAndreas Gruenbacher number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 572e65f440dSLars Ellenberg } 573813472ceSPhilipp Reisner rcu_read_unlock(); 574e65f440dSLars Ellenberg 5750e49d7b0SLars Ellenberg /* Don't have more than "max-buffers"/2 in-flight. 5760e49d7b0SLars Ellenberg * Otherwise we may cause the remote site to stall on drbd_alloc_pages(), 5770e49d7b0SLars Ellenberg * potentially causing a distributed deadlock on congestion during 5780e49d7b0SLars Ellenberg * online-verify or (checksum-based) resync, if max-buffers, 5790e49d7b0SLars Ellenberg * socket buffer sizes and resync rate settings are mis-configured. */ 5800e49d7b0SLars Ellenberg if (mxb - device->rs_in_flight < number) 5810e49d7b0SLars Ellenberg number = mxb - device->rs_in_flight; 5820e49d7b0SLars Ellenberg 583e65f440dSLars Ellenberg return number; 584e65f440dSLars Ellenberg } 585e65f440dSLars Ellenberg 586d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *device, int cancel) 587b411b363SPhilipp Reisner { 588b411b363SPhilipp Reisner unsigned long bit; 589b411b363SPhilipp Reisner sector_t sector; 590b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 5911816a2b4SLars Ellenberg int max_bio_size; 592e65f440dSLars Ellenberg int number, rollback_i, size; 593b411b363SPhilipp Reisner int align, queued, sndbuf; 5940f0601f4SLars Ellenberg int i = 0; 595b411b363SPhilipp Reisner 596b411b363SPhilipp Reisner if (unlikely(cancel)) 59799920dc5SAndreas Gruenbacher return 0; 598b411b363SPhilipp Reisner 599b30ab791SAndreas Gruenbacher if (device->rs_total == 0) { 600af85e8e8SLars Ellenberg /* empty resync? */ 601b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 60299920dc5SAndreas Gruenbacher return 0; 603af85e8e8SLars Ellenberg } 604af85e8e8SLars Ellenberg 605b30ab791SAndreas Gruenbacher if (!get_ldev(device)) { 606b30ab791SAndreas Gruenbacher /* Since we only need to access device->rsync a 607b30ab791SAndreas Gruenbacher get_ldev_if_state(device,D_FAILED) would be sufficient, but 608b411b363SPhilipp Reisner to continue resync with a broken disk makes no sense at 609b411b363SPhilipp Reisner all */ 610d0180171SAndreas Gruenbacher drbd_err(device, "Disk broke down during resync!\n"); 61199920dc5SAndreas Gruenbacher return 0; 612b411b363SPhilipp Reisner } 613b411b363SPhilipp Reisner 614b30ab791SAndreas Gruenbacher max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; 615b30ab791SAndreas Gruenbacher number = drbd_rs_number_requests(device); 6160e49d7b0SLars Ellenberg if (number <= 0) 6170f0601f4SLars Ellenberg goto requeue; 618b411b363SPhilipp Reisner 619b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 620b411b363SPhilipp Reisner /* Stop generating RS requests, when half of the send buffer is filled */ 621a6b32bc3SAndreas Gruenbacher mutex_lock(&first_peer_device(device)->connection->data.mutex); 622a6b32bc3SAndreas Gruenbacher if (first_peer_device(device)->connection->data.socket) { 623a6b32bc3SAndreas Gruenbacher queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued; 624a6b32bc3SAndreas Gruenbacher sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf; 625b411b363SPhilipp Reisner } else { 626b411b363SPhilipp Reisner queued = 1; 627b411b363SPhilipp Reisner sndbuf = 0; 628b411b363SPhilipp Reisner } 629a6b32bc3SAndreas Gruenbacher mutex_unlock(&first_peer_device(device)->connection->data.mutex); 630b411b363SPhilipp Reisner if (queued > sndbuf / 2) 631b411b363SPhilipp Reisner goto requeue; 632b411b363SPhilipp Reisner 633b411b363SPhilipp Reisner next_sector: 634b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 635b30ab791SAndreas Gruenbacher bit = drbd_bm_find_next(device, device->bm_resync_fo); 636b411b363SPhilipp Reisner 6374b0715f0SLars Ellenberg if (bit == DRBD_END_OF_BITMAP) { 638b30ab791SAndreas Gruenbacher device->bm_resync_fo = drbd_bm_bits(device); 639b30ab791SAndreas Gruenbacher put_ldev(device); 64099920dc5SAndreas Gruenbacher return 0; 641b411b363SPhilipp Reisner } 642b411b363SPhilipp Reisner 643b411b363SPhilipp Reisner sector = BM_BIT_TO_SECT(bit); 644b411b363SPhilipp Reisner 645b30ab791SAndreas Gruenbacher if (drbd_rs_should_slow_down(device, sector) || 646b30ab791SAndreas Gruenbacher drbd_try_rs_begin_io(device, sector)) { 647b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit; 648b411b363SPhilipp Reisner goto requeue; 649b411b363SPhilipp Reisner } 650b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit + 1; 651b411b363SPhilipp Reisner 652b30ab791SAndreas Gruenbacher if (unlikely(drbd_bm_test_bit(device, bit) == 0)) { 653b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 654b411b363SPhilipp Reisner goto next_sector; 655b411b363SPhilipp Reisner } 656b411b363SPhilipp Reisner 6571816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE 658b411b363SPhilipp Reisner /* try to find some adjacent bits. 659b411b363SPhilipp Reisner * we stop if we have already the maximum req size. 660b411b363SPhilipp Reisner * 661b411b363SPhilipp Reisner * Additionally always align bigger requests, in order to 662b411b363SPhilipp Reisner * be prepared for all stripe sizes of software RAIDs. 663b411b363SPhilipp Reisner */ 664b411b363SPhilipp Reisner align = 1; 665d207450cSPhilipp Reisner rollback_i = i; 6666377b923SLars Ellenberg while (i < number) { 6671816a2b4SLars Ellenberg if (size + BM_BLOCK_SIZE > max_bio_size) 668b411b363SPhilipp Reisner break; 669b411b363SPhilipp Reisner 670b411b363SPhilipp Reisner /* Be always aligned */ 671b411b363SPhilipp Reisner if (sector & ((1<<(align+3))-1)) 672b411b363SPhilipp Reisner break; 673b411b363SPhilipp Reisner 674b411b363SPhilipp Reisner /* do not cross extent boundaries */ 675b411b363SPhilipp Reisner if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) 676b411b363SPhilipp Reisner break; 677b411b363SPhilipp Reisner /* now, is it actually dirty, after all? 678b411b363SPhilipp Reisner * caution, drbd_bm_test_bit is tri-state for some 679b411b363SPhilipp Reisner * obscure reason; ( b == 0 ) would get the out-of-band 680b411b363SPhilipp Reisner * only accidentally right because of the "oddly sized" 681b411b363SPhilipp Reisner * adjustment below */ 682b30ab791SAndreas Gruenbacher if (drbd_bm_test_bit(device, bit+1) != 1) 683b411b363SPhilipp Reisner break; 684b411b363SPhilipp Reisner bit++; 685b411b363SPhilipp Reisner size += BM_BLOCK_SIZE; 686b411b363SPhilipp Reisner if ((BM_BLOCK_SIZE << align) <= size) 687b411b363SPhilipp Reisner align++; 688b411b363SPhilipp Reisner i++; 689b411b363SPhilipp Reisner } 690b411b363SPhilipp Reisner /* if we merged some, 691b411b363SPhilipp Reisner * reset the offset to start the next drbd_bm_find_next from */ 692b411b363SPhilipp Reisner if (size > BM_BLOCK_SIZE) 693b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit + 1; 694b411b363SPhilipp Reisner #endif 695b411b363SPhilipp Reisner 696b411b363SPhilipp Reisner /* adjust very last sectors, in case we are oddly sized */ 697b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 698b411b363SPhilipp Reisner size = (capacity-sector)<<9; 699a6b32bc3SAndreas Gruenbacher if (first_peer_device(device)->connection->agreed_pro_version >= 89 && 700a6b32bc3SAndreas Gruenbacher first_peer_device(device)->connection->csums_tfm) { 70169a22773SAndreas Gruenbacher switch (read_for_csum(first_peer_device(device), sector, size)) { 70280a40e43SLars Ellenberg case -EIO: /* Disk failure */ 703b30ab791SAndreas Gruenbacher put_ldev(device); 70499920dc5SAndreas Gruenbacher return -EIO; 70580a40e43SLars Ellenberg case -EAGAIN: /* allocation failed, or ldev busy */ 706b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 707b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 708d207450cSPhilipp Reisner i = rollback_i; 709b411b363SPhilipp Reisner goto requeue; 71080a40e43SLars Ellenberg case 0: 71180a40e43SLars Ellenberg /* everything ok */ 71280a40e43SLars Ellenberg break; 71380a40e43SLars Ellenberg default: 71480a40e43SLars Ellenberg BUG(); 715b411b363SPhilipp Reisner } 716b411b363SPhilipp Reisner } else { 71799920dc5SAndreas Gruenbacher int err; 71899920dc5SAndreas Gruenbacher 719b30ab791SAndreas Gruenbacher inc_rs_pending(device); 72069a22773SAndreas Gruenbacher err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST, 72199920dc5SAndreas Gruenbacher sector, size, ID_SYNCER); 72299920dc5SAndreas Gruenbacher if (err) { 723d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); 724b30ab791SAndreas Gruenbacher dec_rs_pending(device); 725b30ab791SAndreas Gruenbacher put_ldev(device); 72699920dc5SAndreas Gruenbacher return err; 727b411b363SPhilipp Reisner } 728b411b363SPhilipp Reisner } 729b411b363SPhilipp Reisner } 730b411b363SPhilipp Reisner 731b30ab791SAndreas Gruenbacher if (device->bm_resync_fo >= drbd_bm_bits(device)) { 732b411b363SPhilipp Reisner /* last syncer _request_ was sent, 733b411b363SPhilipp Reisner * but the P_RS_DATA_REPLY not yet received. sync will end (and 734b411b363SPhilipp Reisner * next sync group will resume), as soon as we receive the last 735b411b363SPhilipp Reisner * resync data block, and the last bit is cleared. 736b411b363SPhilipp Reisner * until then resync "work" is "inactive" ... 737b411b363SPhilipp Reisner */ 738b30ab791SAndreas Gruenbacher put_ldev(device); 73999920dc5SAndreas Gruenbacher return 0; 740b411b363SPhilipp Reisner } 741b411b363SPhilipp Reisner 742b411b363SPhilipp Reisner requeue: 743b30ab791SAndreas Gruenbacher device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 744b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 745b30ab791SAndreas Gruenbacher put_ldev(device); 74699920dc5SAndreas Gruenbacher return 0; 747b411b363SPhilipp Reisner } 748b411b363SPhilipp Reisner 749d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *device, int cancel) 750b411b363SPhilipp Reisner { 751b411b363SPhilipp Reisner int number, i, size; 752b411b363SPhilipp Reisner sector_t sector; 753b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 75458ffa580SLars Ellenberg bool stop_sector_reached = false; 755b411b363SPhilipp Reisner 756b411b363SPhilipp Reisner if (unlikely(cancel)) 757b411b363SPhilipp Reisner return 1; 758b411b363SPhilipp Reisner 759b30ab791SAndreas Gruenbacher number = drbd_rs_number_requests(device); 760b411b363SPhilipp Reisner 761b30ab791SAndreas Gruenbacher sector = device->ov_position; 762b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 76358ffa580SLars Ellenberg if (sector >= capacity) 764b411b363SPhilipp Reisner return 1; 76558ffa580SLars Ellenberg 76658ffa580SLars Ellenberg /* We check for "finished" only in the reply path: 76758ffa580SLars Ellenberg * w_e_end_ov_reply(). 76858ffa580SLars Ellenberg * We need to send at least one request out. */ 76958ffa580SLars Ellenberg stop_sector_reached = i > 0 770b30ab791SAndreas Gruenbacher && verify_can_do_stop_sector(device) 771b30ab791SAndreas Gruenbacher && sector >= device->ov_stop_sector; 77258ffa580SLars Ellenberg if (stop_sector_reached) 77358ffa580SLars Ellenberg break; 774b411b363SPhilipp Reisner 775b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 776b411b363SPhilipp Reisner 777b30ab791SAndreas Gruenbacher if (drbd_rs_should_slow_down(device, sector) || 778b30ab791SAndreas Gruenbacher drbd_try_rs_begin_io(device, sector)) { 779b30ab791SAndreas Gruenbacher device->ov_position = sector; 780b411b363SPhilipp Reisner goto requeue; 781b411b363SPhilipp Reisner } 782b411b363SPhilipp Reisner 783b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 784b411b363SPhilipp Reisner size = (capacity-sector)<<9; 785b411b363SPhilipp Reisner 786b30ab791SAndreas Gruenbacher inc_rs_pending(device); 78769a22773SAndreas Gruenbacher if (drbd_send_ov_request(first_peer_device(device), sector, size)) { 788b30ab791SAndreas Gruenbacher dec_rs_pending(device); 789b411b363SPhilipp Reisner return 0; 790b411b363SPhilipp Reisner } 791b411b363SPhilipp Reisner sector += BM_SECT_PER_BIT; 792b411b363SPhilipp Reisner } 793b30ab791SAndreas Gruenbacher device->ov_position = sector; 794b411b363SPhilipp Reisner 795b411b363SPhilipp Reisner requeue: 796b30ab791SAndreas Gruenbacher device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 79758ffa580SLars Ellenberg if (i == 0 || !stop_sector_reached) 798b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 799b411b363SPhilipp Reisner return 1; 800b411b363SPhilipp Reisner } 801b411b363SPhilipp Reisner 80299920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel) 803b411b363SPhilipp Reisner { 80484b8c06bSAndreas Gruenbacher struct drbd_device_work *dw = 80584b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device_work, w); 80684b8c06bSAndreas Gruenbacher struct drbd_device *device = dw->device; 80784b8c06bSAndreas Gruenbacher kfree(dw); 808b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 809b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 810b411b363SPhilipp Reisner 81199920dc5SAndreas Gruenbacher return 0; 812b411b363SPhilipp Reisner } 813b411b363SPhilipp Reisner 81499920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel) 815b411b363SPhilipp Reisner { 81684b8c06bSAndreas Gruenbacher struct drbd_device_work *dw = 81784b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device_work, w); 81884b8c06bSAndreas Gruenbacher struct drbd_device *device = dw->device; 81984b8c06bSAndreas Gruenbacher kfree(dw); 820b411b363SPhilipp Reisner 821b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 822b411b363SPhilipp Reisner 82399920dc5SAndreas Gruenbacher return 0; 824b411b363SPhilipp Reisner } 825b411b363SPhilipp Reisner 826b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device) 827af85e8e8SLars Ellenberg { 828a6b32bc3SAndreas Gruenbacher struct drbd_connection *connection = first_peer_device(device)->connection; 8292a67d8b9SPhilipp Reisner 830bde89a9eSAndreas Gruenbacher clear_bit(GOT_PING_ACK, &connection->flags); 831bde89a9eSAndreas Gruenbacher request_ping(connection); 832bde89a9eSAndreas Gruenbacher wait_event(connection->ping_wait, 833bde89a9eSAndreas Gruenbacher test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); 834af85e8e8SLars Ellenberg } 835af85e8e8SLars Ellenberg 836b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device) 837b411b363SPhilipp Reisner { 838b411b363SPhilipp Reisner unsigned long db, dt, dbdt; 839b411b363SPhilipp Reisner unsigned long n_oos; 840b411b363SPhilipp Reisner union drbd_state os, ns; 84184b8c06bSAndreas Gruenbacher struct drbd_device_work *dw; 842b411b363SPhilipp Reisner char *khelper_cmd = NULL; 84326525618SLars Ellenberg int verify_done = 0; 844b411b363SPhilipp Reisner 845b411b363SPhilipp Reisner /* Remove all elements from the resync LRU. Since future actions 846b411b363SPhilipp Reisner * might set bits in the (main) bitmap, then the entries in the 847b411b363SPhilipp Reisner * resync LRU would be wrong. */ 848b30ab791SAndreas Gruenbacher if (drbd_rs_del_all(device)) { 849b411b363SPhilipp Reisner /* In case this is not possible now, most probably because 850b411b363SPhilipp Reisner * there are P_RS_DATA_REPLY Packets lingering on the worker's 851b411b363SPhilipp Reisner * queue (or even the read operations for those packets 852b411b363SPhilipp Reisner * is not finished by now). Retry in 100ms. */ 853b411b363SPhilipp Reisner 85420ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ / 10); 85584b8c06bSAndreas Gruenbacher dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC); 85684b8c06bSAndreas Gruenbacher if (dw) { 85784b8c06bSAndreas Gruenbacher dw->w.cb = w_resync_finished; 85884b8c06bSAndreas Gruenbacher dw->device = device; 85984b8c06bSAndreas Gruenbacher drbd_queue_work(&first_peer_device(device)->connection->sender_work, 86084b8c06bSAndreas Gruenbacher &dw->w); 861b411b363SPhilipp Reisner return 1; 862b411b363SPhilipp Reisner } 86384b8c06bSAndreas Gruenbacher drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); 864b411b363SPhilipp Reisner } 865b411b363SPhilipp Reisner 866b30ab791SAndreas Gruenbacher dt = (jiffies - device->rs_start - device->rs_paused) / HZ; 867b411b363SPhilipp Reisner if (dt <= 0) 868b411b363SPhilipp Reisner dt = 1; 86958ffa580SLars Ellenberg 870b30ab791SAndreas Gruenbacher db = device->rs_total; 87158ffa580SLars Ellenberg /* adjust for verify start and stop sectors, respective reached position */ 872b30ab791SAndreas Gruenbacher if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 873b30ab791SAndreas Gruenbacher db -= device->ov_left; 87458ffa580SLars Ellenberg 875b411b363SPhilipp Reisner dbdt = Bit2KB(db/dt); 876b30ab791SAndreas Gruenbacher device->rs_paused /= HZ; 877b411b363SPhilipp Reisner 878b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 879b411b363SPhilipp Reisner goto out; 880b411b363SPhilipp Reisner 881b30ab791SAndreas Gruenbacher ping_peer(device); 882af85e8e8SLars Ellenberg 8830500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 884b30ab791SAndreas Gruenbacher os = drbd_read_state(device); 885b411b363SPhilipp Reisner 88626525618SLars Ellenberg verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); 88726525618SLars Ellenberg 888b411b363SPhilipp Reisner /* This protects us against multiple calls (that can happen in the presence 889b411b363SPhilipp Reisner of application IO), and against connectivity loss just before we arrive here. */ 890b411b363SPhilipp Reisner if (os.conn <= C_CONNECTED) 891b411b363SPhilipp Reisner goto out_unlock; 892b411b363SPhilipp Reisner 893b411b363SPhilipp Reisner ns = os; 894b411b363SPhilipp Reisner ns.conn = C_CONNECTED; 895b411b363SPhilipp Reisner 896d0180171SAndreas Gruenbacher drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 89726525618SLars Ellenberg verify_done ? "Online verify" : "Resync", 898b30ab791SAndreas Gruenbacher dt + device->rs_paused, device->rs_paused, dbdt); 899b411b363SPhilipp Reisner 900b30ab791SAndreas Gruenbacher n_oos = drbd_bm_total_weight(device); 901b411b363SPhilipp Reisner 902b411b363SPhilipp Reisner if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { 903b411b363SPhilipp Reisner if (n_oos) { 904d0180171SAndreas Gruenbacher drbd_alert(device, "Online verify found %lu %dk block out of sync!\n", 905b411b363SPhilipp Reisner n_oos, Bit2KB(1)); 906b411b363SPhilipp Reisner khelper_cmd = "out-of-sync"; 907b411b363SPhilipp Reisner } 908b411b363SPhilipp Reisner } else { 9090b0ba1efSAndreas Gruenbacher D_ASSERT(device, (n_oos - device->rs_failed) == 0); 910b411b363SPhilipp Reisner 911b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) 912b411b363SPhilipp Reisner khelper_cmd = "after-resync-target"; 913b411b363SPhilipp Reisner 914a6b32bc3SAndreas Gruenbacher if (first_peer_device(device)->connection->csums_tfm && device->rs_total) { 915b30ab791SAndreas Gruenbacher const unsigned long s = device->rs_same_csum; 916b30ab791SAndreas Gruenbacher const unsigned long t = device->rs_total; 917b411b363SPhilipp Reisner const int ratio = 918b411b363SPhilipp Reisner (t == 0) ? 0 : 919b411b363SPhilipp Reisner (t < 100000) ? ((s*100)/t) : (s/(t/100)); 920d0180171SAndreas Gruenbacher drbd_info(device, "%u %% had equal checksums, eliminated: %luK; " 921b411b363SPhilipp Reisner "transferred %luK total %luK\n", 922b411b363SPhilipp Reisner ratio, 923b30ab791SAndreas Gruenbacher Bit2KB(device->rs_same_csum), 924b30ab791SAndreas Gruenbacher Bit2KB(device->rs_total - device->rs_same_csum), 925b30ab791SAndreas Gruenbacher Bit2KB(device->rs_total)); 926b411b363SPhilipp Reisner } 927b411b363SPhilipp Reisner } 928b411b363SPhilipp Reisner 929b30ab791SAndreas Gruenbacher if (device->rs_failed) { 930d0180171SAndreas Gruenbacher drbd_info(device, " %lu failed blocks\n", device->rs_failed); 931b411b363SPhilipp Reisner 932b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 933b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 934b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 935b411b363SPhilipp Reisner } else { 936b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 937b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 938b411b363SPhilipp Reisner } 939b411b363SPhilipp Reisner } else { 940b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 941b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 942b411b363SPhilipp Reisner 943b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 944b30ab791SAndreas Gruenbacher if (device->p_uuid) { 945b411b363SPhilipp Reisner int i; 946b411b363SPhilipp Reisner for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) 947b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, i, device->p_uuid[i]); 948b30ab791SAndreas Gruenbacher drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]); 949b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]); 950b411b363SPhilipp Reisner } else { 951d0180171SAndreas Gruenbacher drbd_err(device, "device->p_uuid is NULL! BUG\n"); 952b411b363SPhilipp Reisner } 953b411b363SPhilipp Reisner } 954b411b363SPhilipp Reisner 95562b0da3aSLars Ellenberg if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { 95662b0da3aSLars Ellenberg /* for verify runs, we don't update uuids here, 95762b0da3aSLars Ellenberg * so there would be nothing to report. */ 958b30ab791SAndreas Gruenbacher drbd_uuid_set_bm(device, 0UL); 959b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "updated UUIDs"); 960b30ab791SAndreas Gruenbacher if (device->p_uuid) { 961b411b363SPhilipp Reisner /* Now the two UUID sets are equal, update what we 962b411b363SPhilipp Reisner * know of the peer. */ 963b411b363SPhilipp Reisner int i; 964b411b363SPhilipp Reisner for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) 965b30ab791SAndreas Gruenbacher device->p_uuid[i] = device->ldev->md.uuid[i]; 966b411b363SPhilipp Reisner } 967b411b363SPhilipp Reisner } 96862b0da3aSLars Ellenberg } 969b411b363SPhilipp Reisner 970b30ab791SAndreas Gruenbacher _drbd_set_state(device, ns, CS_VERBOSE, NULL); 971b411b363SPhilipp Reisner out_unlock: 9720500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 973b30ab791SAndreas Gruenbacher put_ldev(device); 974b411b363SPhilipp Reisner out: 975b30ab791SAndreas Gruenbacher device->rs_total = 0; 976b30ab791SAndreas Gruenbacher device->rs_failed = 0; 977b30ab791SAndreas Gruenbacher device->rs_paused = 0; 97858ffa580SLars Ellenberg 97958ffa580SLars Ellenberg /* reset start sector, if we reached end of device */ 980b30ab791SAndreas Gruenbacher if (verify_done && device->ov_left == 0) 981b30ab791SAndreas Gruenbacher device->ov_start_sector = 0; 982b411b363SPhilipp Reisner 983b30ab791SAndreas Gruenbacher drbd_md_sync(device); 98413d42685SLars Ellenberg 985b411b363SPhilipp Reisner if (khelper_cmd) 986b30ab791SAndreas Gruenbacher drbd_khelper(device, khelper_cmd); 987b411b363SPhilipp Reisner 988b411b363SPhilipp Reisner return 1; 989b411b363SPhilipp Reisner } 990b411b363SPhilipp Reisner 991b411b363SPhilipp Reisner /* helper */ 992b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) 993b411b363SPhilipp Reisner { 994045417f7SAndreas Gruenbacher if (drbd_peer_req_has_active_page(peer_req)) { 995b411b363SPhilipp Reisner /* This might happen if sendpage() has not finished */ 996db830c46SAndreas Gruenbacher int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; 997b30ab791SAndreas Gruenbacher atomic_add(i, &device->pp_in_use_by_net); 998b30ab791SAndreas Gruenbacher atomic_sub(i, &device->pp_in_use); 9990500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1000a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->net_ee); 10010500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1002435f0740SLars Ellenberg wake_up(&drbd_pp_wait); 1003b411b363SPhilipp Reisner } else 1004b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1005b411b363SPhilipp Reisner } 1006b411b363SPhilipp Reisner 1007b411b363SPhilipp Reisner /** 1008b411b363SPhilipp Reisner * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST 1009b30ab791SAndreas Gruenbacher * @device: DRBD device. 1010b411b363SPhilipp Reisner * @w: work object. 1011b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1012b411b363SPhilipp Reisner */ 101399920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel) 1014b411b363SPhilipp Reisner { 1015a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 10166780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 10176780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 101899920dc5SAndreas Gruenbacher int err; 1019b411b363SPhilipp Reisner 1020b411b363SPhilipp Reisner if (unlikely(cancel)) { 1021b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1022b30ab791SAndreas Gruenbacher dec_unacked(device); 102399920dc5SAndreas Gruenbacher return 0; 1024b411b363SPhilipp Reisner } 1025b411b363SPhilipp Reisner 1026db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 10276780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req); 1028b411b363SPhilipp Reisner } else { 1029b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1030d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegDReply. sector=%llus.\n", 1031db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 1032b411b363SPhilipp Reisner 10336780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); 1034b411b363SPhilipp Reisner } 1035b411b363SPhilipp Reisner 1036b30ab791SAndreas Gruenbacher dec_unacked(device); 1037b411b363SPhilipp Reisner 1038b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1039b411b363SPhilipp Reisner 104099920dc5SAndreas Gruenbacher if (unlikely(err)) 1041d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block() failed\n"); 104299920dc5SAndreas Gruenbacher return err; 1043b411b363SPhilipp Reisner } 1044b411b363SPhilipp Reisner 1045b411b363SPhilipp Reisner /** 1046a209b4aeSAndreas Gruenbacher * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST 1047b411b363SPhilipp Reisner * @w: work object. 1048b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1049b411b363SPhilipp Reisner */ 105099920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel) 1051b411b363SPhilipp Reisner { 1052a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 10536780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 10546780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 105599920dc5SAndreas Gruenbacher int err; 1056b411b363SPhilipp Reisner 1057b411b363SPhilipp Reisner if (unlikely(cancel)) { 1058b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1059b30ab791SAndreas Gruenbacher dec_unacked(device); 106099920dc5SAndreas Gruenbacher return 0; 1061b411b363SPhilipp Reisner } 1062b411b363SPhilipp Reisner 1063b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_FAILED)) { 1064b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1065b30ab791SAndreas Gruenbacher put_ldev(device); 1066b411b363SPhilipp Reisner } 1067b411b363SPhilipp Reisner 1068b30ab791SAndreas Gruenbacher if (device->state.conn == C_AHEAD) { 10696780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); 1070db830c46SAndreas Gruenbacher } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1071b30ab791SAndreas Gruenbacher if (likely(device->state.pdsk >= D_INCONSISTENT)) { 1072b30ab791SAndreas Gruenbacher inc_rs_pending(device); 10736780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 1074b411b363SPhilipp Reisner } else { 1075b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1076d0180171SAndreas Gruenbacher drbd_err(device, "Not sending RSDataReply, " 1077b411b363SPhilipp Reisner "partner DISKLESS!\n"); 107899920dc5SAndreas Gruenbacher err = 0; 1079b411b363SPhilipp Reisner } 1080b411b363SPhilipp Reisner } else { 1081b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1082d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegRSDReply. sector %llus.\n", 1083db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 1084b411b363SPhilipp Reisner 10856780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 1086b411b363SPhilipp Reisner 1087b411b363SPhilipp Reisner /* update resync data with failure */ 1088b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); 1089b411b363SPhilipp Reisner } 1090b411b363SPhilipp Reisner 1091b30ab791SAndreas Gruenbacher dec_unacked(device); 1092b411b363SPhilipp Reisner 1093b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1094b411b363SPhilipp Reisner 109599920dc5SAndreas Gruenbacher if (unlikely(err)) 1096d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block() failed\n"); 109799920dc5SAndreas Gruenbacher return err; 1098b411b363SPhilipp Reisner } 1099b411b363SPhilipp Reisner 110099920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) 1101b411b363SPhilipp Reisner { 1102a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11036780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 11046780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1105b411b363SPhilipp Reisner struct digest_info *di; 1106b411b363SPhilipp Reisner int digest_size; 1107b411b363SPhilipp Reisner void *digest = NULL; 110899920dc5SAndreas Gruenbacher int err, eq = 0; 1109b411b363SPhilipp Reisner 1110b411b363SPhilipp Reisner if (unlikely(cancel)) { 1111b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1112b30ab791SAndreas Gruenbacher dec_unacked(device); 111399920dc5SAndreas Gruenbacher return 0; 1114b411b363SPhilipp Reisner } 1115b411b363SPhilipp Reisner 1116b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1117b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1118b30ab791SAndreas Gruenbacher put_ldev(device); 11191d53f09eSLars Ellenberg } 1120b411b363SPhilipp Reisner 1121db830c46SAndreas Gruenbacher di = peer_req->digest; 1122b411b363SPhilipp Reisner 1123db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1124b411b363SPhilipp Reisner /* quick hack to try to avoid a race against reconfiguration. 1125b411b363SPhilipp Reisner * a real fix would be much more involved, 1126b411b363SPhilipp Reisner * introducing more locking mechanisms */ 11276780139cSAndreas Gruenbacher if (peer_device->connection->csums_tfm) { 11286780139cSAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm); 11290b0ba1efSAndreas Gruenbacher D_ASSERT(device, digest_size == di->digest_size); 1130b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1131b411b363SPhilipp Reisner } 1132b411b363SPhilipp Reisner if (digest) { 11336780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 1134b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1135b411b363SPhilipp Reisner kfree(digest); 1136b411b363SPhilipp Reisner } 1137b411b363SPhilipp Reisner 1138b411b363SPhilipp Reisner if (eq) { 1139b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); 1140676396d5SLars Ellenberg /* rs_same_csums unit is BM_BLOCK_SIZE */ 1141b30ab791SAndreas Gruenbacher device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; 11426780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); 1143b411b363SPhilipp Reisner } else { 1144b30ab791SAndreas Gruenbacher inc_rs_pending(device); 1145db830c46SAndreas Gruenbacher peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ 1146db830c46SAndreas Gruenbacher peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ 1147204bba99SPhilipp Reisner kfree(di); 11486780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 1149b411b363SPhilipp Reisner } 1150b411b363SPhilipp Reisner } else { 11516780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 1152b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1153d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); 1154b411b363SPhilipp Reisner } 1155b411b363SPhilipp Reisner 1156b30ab791SAndreas Gruenbacher dec_unacked(device); 1157b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1158b411b363SPhilipp Reisner 115999920dc5SAndreas Gruenbacher if (unlikely(err)) 1160d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block/ack() failed\n"); 116199920dc5SAndreas Gruenbacher return err; 1162b411b363SPhilipp Reisner } 1163b411b363SPhilipp Reisner 116499920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel) 1165b411b363SPhilipp Reisner { 1166a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11676780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 11686780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1169db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1170db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 1171b411b363SPhilipp Reisner int digest_size; 1172b411b363SPhilipp Reisner void *digest; 117399920dc5SAndreas Gruenbacher int err = 0; 1174b411b363SPhilipp Reisner 1175b411b363SPhilipp Reisner if (unlikely(cancel)) 1176b411b363SPhilipp Reisner goto out; 1177b411b363SPhilipp Reisner 11786780139cSAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm); 1179b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 11808f21420eSPhilipp Reisner if (!digest) { 118199920dc5SAndreas Gruenbacher err = 1; /* terminate the connection in case the allocation failed */ 11828f21420eSPhilipp Reisner goto out; 11838f21420eSPhilipp Reisner } 11848f21420eSPhilipp Reisner 1185db830c46SAndreas Gruenbacher if (likely(!(peer_req->flags & EE_WAS_ERROR))) 11866780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 11878f21420eSPhilipp Reisner else 11888f21420eSPhilipp Reisner memset(digest, 0, digest_size); 11898f21420eSPhilipp Reisner 119053ea4331SLars Ellenberg /* Free e and pages before send. 119153ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 119253ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 119353ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 1194c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 1195b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1196db830c46SAndreas Gruenbacher peer_req = NULL; 1197b30ab791SAndreas Gruenbacher inc_rs_pending(device); 11986780139cSAndreas Gruenbacher err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); 119999920dc5SAndreas Gruenbacher if (err) 1200b30ab791SAndreas Gruenbacher dec_rs_pending(device); 1201b411b363SPhilipp Reisner kfree(digest); 1202b411b363SPhilipp Reisner 1203b411b363SPhilipp Reisner out: 1204db830c46SAndreas Gruenbacher if (peer_req) 1205b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1206b30ab791SAndreas Gruenbacher dec_unacked(device); 120799920dc5SAndreas Gruenbacher return err; 1208b411b363SPhilipp Reisner } 1209b411b363SPhilipp Reisner 1210b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) 1211b411b363SPhilipp Reisner { 1212b30ab791SAndreas Gruenbacher if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { 1213b30ab791SAndreas Gruenbacher device->ov_last_oos_size += size>>9; 1214b411b363SPhilipp Reisner } else { 1215b30ab791SAndreas Gruenbacher device->ov_last_oos_start = sector; 1216b30ab791SAndreas Gruenbacher device->ov_last_oos_size = size>>9; 1217b411b363SPhilipp Reisner } 1218b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, sector, size); 1219b411b363SPhilipp Reisner } 1220b411b363SPhilipp Reisner 122199920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel) 1222b411b363SPhilipp Reisner { 1223a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 12246780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 12256780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1226b411b363SPhilipp Reisner struct digest_info *di; 1227b411b363SPhilipp Reisner void *digest; 1228db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1229db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 123053ea4331SLars Ellenberg int digest_size; 123199920dc5SAndreas Gruenbacher int err, eq = 0; 123258ffa580SLars Ellenberg bool stop_sector_reached = false; 1233b411b363SPhilipp Reisner 1234b411b363SPhilipp Reisner if (unlikely(cancel)) { 1235b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1236b30ab791SAndreas Gruenbacher dec_unacked(device); 123799920dc5SAndreas Gruenbacher return 0; 1238b411b363SPhilipp Reisner } 1239b411b363SPhilipp Reisner 1240b411b363SPhilipp Reisner /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all 1241b411b363SPhilipp Reisner * the resync lru has been cleaned up already */ 1242b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1243b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1244b30ab791SAndreas Gruenbacher put_ldev(device); 12451d53f09eSLars Ellenberg } 1246b411b363SPhilipp Reisner 1247db830c46SAndreas Gruenbacher di = peer_req->digest; 1248b411b363SPhilipp Reisner 1249db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 12506780139cSAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm); 1251b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1252b411b363SPhilipp Reisner if (digest) { 12536780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 1254b411b363SPhilipp Reisner 12550b0ba1efSAndreas Gruenbacher D_ASSERT(device, digest_size == di->digest_size); 1256b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1257b411b363SPhilipp Reisner kfree(digest); 1258b411b363SPhilipp Reisner } 1259b411b363SPhilipp Reisner } 1260b411b363SPhilipp Reisner 12619676c760SLars Ellenberg /* Free peer_req and pages before send. 126253ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 126353ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 126453ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 1265c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 1266b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1267b411b363SPhilipp Reisner if (!eq) 1268b30ab791SAndreas Gruenbacher drbd_ov_out_of_sync_found(device, sector, size); 1269b411b363SPhilipp Reisner else 1270b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 1271b411b363SPhilipp Reisner 12726780139cSAndreas Gruenbacher err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, 1273b411b363SPhilipp Reisner eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); 1274b411b363SPhilipp Reisner 1275b30ab791SAndreas Gruenbacher dec_unacked(device); 1276b411b363SPhilipp Reisner 1277b30ab791SAndreas Gruenbacher --device->ov_left; 1278ea5442afSLars Ellenberg 1279ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */ 1280b30ab791SAndreas Gruenbacher if ((device->ov_left & 0x200) == 0x200) 1281b30ab791SAndreas Gruenbacher drbd_advance_rs_marks(device, device->ov_left); 1282ea5442afSLars Ellenberg 1283b30ab791SAndreas Gruenbacher stop_sector_reached = verify_can_do_stop_sector(device) && 1284b30ab791SAndreas Gruenbacher (sector + (size>>9)) >= device->ov_stop_sector; 128558ffa580SLars Ellenberg 1286b30ab791SAndreas Gruenbacher if (device->ov_left == 0 || stop_sector_reached) { 1287b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 1288b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 1289b411b363SPhilipp Reisner } 1290b411b363SPhilipp Reisner 129199920dc5SAndreas Gruenbacher return err; 1292b411b363SPhilipp Reisner } 1293b411b363SPhilipp Reisner 1294b6dd1a89SLars Ellenberg /* FIXME 1295b6dd1a89SLars Ellenberg * We need to track the number of pending barrier acks, 1296b6dd1a89SLars Ellenberg * and to be able to wait for them. 1297b6dd1a89SLars Ellenberg * See also comment in drbd_adm_attach before drbd_suspend_io. 1298b6dd1a89SLars Ellenberg */ 1299bde89a9eSAndreas Gruenbacher static int drbd_send_barrier(struct drbd_connection *connection) 1300b411b363SPhilipp Reisner { 13019f5bdc33SAndreas Gruenbacher struct p_barrier *p; 1302b6dd1a89SLars Ellenberg struct drbd_socket *sock; 1303b411b363SPhilipp Reisner 1304bde89a9eSAndreas Gruenbacher sock = &connection->data; 1305bde89a9eSAndreas Gruenbacher p = conn_prepare_command(connection, sock); 13069f5bdc33SAndreas Gruenbacher if (!p) 13079f5bdc33SAndreas Gruenbacher return -EIO; 1308bde89a9eSAndreas Gruenbacher p->barrier = connection->send.current_epoch_nr; 1309b6dd1a89SLars Ellenberg p->pad = 0; 1310bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes = 0; 1311b6dd1a89SLars Ellenberg 1312bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); 1313b411b363SPhilipp Reisner } 1314b411b363SPhilipp Reisner 131599920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel) 1316b411b363SPhilipp Reisner { 131784b8c06bSAndreas Gruenbacher struct drbd_device *device = 131884b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, unplug_work); 13199f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 13209f5bdc33SAndreas Gruenbacher 1321b411b363SPhilipp Reisner if (cancel) 132299920dc5SAndreas Gruenbacher return 0; 1323a6b32bc3SAndreas Gruenbacher sock = &first_peer_device(device)->connection->data; 132469a22773SAndreas Gruenbacher if (!drbd_prepare_command(first_peer_device(device), sock)) 13259f5bdc33SAndreas Gruenbacher return -EIO; 132669a22773SAndreas Gruenbacher return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0); 1327b411b363SPhilipp Reisner } 1328b411b363SPhilipp Reisner 1329bde89a9eSAndreas Gruenbacher static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch) 13304eb9b3cbSLars Ellenberg { 1331bde89a9eSAndreas Gruenbacher if (!connection->send.seen_any_write_yet) { 1332bde89a9eSAndreas Gruenbacher connection->send.seen_any_write_yet = true; 1333bde89a9eSAndreas Gruenbacher connection->send.current_epoch_nr = epoch; 1334bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes = 0; 13354eb9b3cbSLars Ellenberg } 13364eb9b3cbSLars Ellenberg } 13374eb9b3cbSLars Ellenberg 1338bde89a9eSAndreas Gruenbacher static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch) 13394eb9b3cbSLars Ellenberg { 13404eb9b3cbSLars Ellenberg /* re-init if first write on this connection */ 1341bde89a9eSAndreas Gruenbacher if (!connection->send.seen_any_write_yet) 13424eb9b3cbSLars Ellenberg return; 1343bde89a9eSAndreas Gruenbacher if (connection->send.current_epoch_nr != epoch) { 1344bde89a9eSAndreas Gruenbacher if (connection->send.current_epoch_writes) 1345bde89a9eSAndreas Gruenbacher drbd_send_barrier(connection); 1346bde89a9eSAndreas Gruenbacher connection->send.current_epoch_nr = epoch; 13474eb9b3cbSLars Ellenberg } 13484eb9b3cbSLars Ellenberg } 13494eb9b3cbSLars Ellenberg 13508f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel) 135173a01a18SPhilipp Reisner { 135273a01a18SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 135384b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 1354a6b32bc3SAndreas Gruenbacher struct drbd_connection *connection = first_peer_device(device)->connection; 135599920dc5SAndreas Gruenbacher int err; 135673a01a18SPhilipp Reisner 135773a01a18SPhilipp Reisner if (unlikely(cancel)) { 13588554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 135999920dc5SAndreas Gruenbacher return 0; 136073a01a18SPhilipp Reisner } 136173a01a18SPhilipp Reisner 1362bde89a9eSAndreas Gruenbacher /* this time, no connection->send.current_epoch_writes++; 1363b6dd1a89SLars Ellenberg * If it was sent, it was the closing barrier for the last 1364b6dd1a89SLars Ellenberg * replicated epoch, before we went into AHEAD mode. 1365b6dd1a89SLars Ellenberg * No more barriers will be sent, until we leave AHEAD mode again. */ 1366bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1367b6dd1a89SLars Ellenberg 136869a22773SAndreas Gruenbacher err = drbd_send_out_of_sync(first_peer_device(device), req); 13698554df1cSAndreas Gruenbacher req_mod(req, OOS_HANDED_TO_NETWORK); 137073a01a18SPhilipp Reisner 137199920dc5SAndreas Gruenbacher return err; 137273a01a18SPhilipp Reisner } 137373a01a18SPhilipp Reisner 1374b411b363SPhilipp Reisner /** 1375b411b363SPhilipp Reisner * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request 1376b411b363SPhilipp Reisner * @w: work object. 1377b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1378b411b363SPhilipp Reisner */ 137999920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel) 1380b411b363SPhilipp Reisner { 1381b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 138284b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 1383a6b32bc3SAndreas Gruenbacher struct drbd_connection *connection = first_peer_device(device)->connection; 138499920dc5SAndreas Gruenbacher int err; 1385b411b363SPhilipp Reisner 1386b411b363SPhilipp Reisner if (unlikely(cancel)) { 13878554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 138899920dc5SAndreas Gruenbacher return 0; 1389b411b363SPhilipp Reisner } 1390b411b363SPhilipp Reisner 1391bde89a9eSAndreas Gruenbacher re_init_if_first_write(connection, req->epoch); 1392bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1393bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes++; 1394b6dd1a89SLars Ellenberg 139569a22773SAndreas Gruenbacher err = drbd_send_dblock(first_peer_device(device), req); 139699920dc5SAndreas Gruenbacher req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1397b411b363SPhilipp Reisner 139899920dc5SAndreas Gruenbacher return err; 1399b411b363SPhilipp Reisner } 1400b411b363SPhilipp Reisner 1401b411b363SPhilipp Reisner /** 1402b411b363SPhilipp Reisner * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet 1403b411b363SPhilipp Reisner * @w: work object. 1404b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1405b411b363SPhilipp Reisner */ 140699920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel) 1407b411b363SPhilipp Reisner { 1408b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 140984b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 1410a6b32bc3SAndreas Gruenbacher struct drbd_connection *connection = first_peer_device(device)->connection; 141199920dc5SAndreas Gruenbacher int err; 1412b411b363SPhilipp Reisner 1413b411b363SPhilipp Reisner if (unlikely(cancel)) { 14148554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 141599920dc5SAndreas Gruenbacher return 0; 1416b411b363SPhilipp Reisner } 1417b411b363SPhilipp Reisner 1418b6dd1a89SLars Ellenberg /* Even read requests may close a write epoch, 1419b6dd1a89SLars Ellenberg * if there was any yet. */ 1420bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1421b6dd1a89SLars Ellenberg 142269a22773SAndreas Gruenbacher err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size, 1423b411b363SPhilipp Reisner (unsigned long)req); 1424b411b363SPhilipp Reisner 142599920dc5SAndreas Gruenbacher req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1426b411b363SPhilipp Reisner 142799920dc5SAndreas Gruenbacher return err; 1428b411b363SPhilipp Reisner } 1429b411b363SPhilipp Reisner 143099920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel) 1431265be2d0SPhilipp Reisner { 1432265be2d0SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 143384b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 1434265be2d0SPhilipp Reisner 14350778286aSPhilipp Reisner if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) 1436b30ab791SAndreas Gruenbacher drbd_al_begin_io(device, &req->i, false); 1437265be2d0SPhilipp Reisner 1438265be2d0SPhilipp Reisner drbd_req_make_private_bio(req, req->master_bio); 1439b30ab791SAndreas Gruenbacher req->private_bio->bi_bdev = device->ldev->backing_bdev; 1440265be2d0SPhilipp Reisner generic_make_request(req->private_bio); 1441265be2d0SPhilipp Reisner 144299920dc5SAndreas Gruenbacher return 0; 1443265be2d0SPhilipp Reisner } 1444265be2d0SPhilipp Reisner 1445b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device) 1446b411b363SPhilipp Reisner { 1447b30ab791SAndreas Gruenbacher struct drbd_device *odev = device; 144895f8efd0SAndreas Gruenbacher int resync_after; 1449b411b363SPhilipp Reisner 1450b411b363SPhilipp Reisner while (1) { 1451a3f8f7dcSLars Ellenberg if (!odev->ldev || odev->state.disk == D_DISKLESS) 1452438c8374SPhilipp Reisner return 1; 1453daeda1ccSPhilipp Reisner rcu_read_lock(); 145495f8efd0SAndreas Gruenbacher resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 1455daeda1ccSPhilipp Reisner rcu_read_unlock(); 145695f8efd0SAndreas Gruenbacher if (resync_after == -1) 1457b411b363SPhilipp Reisner return 1; 1458b30ab791SAndreas Gruenbacher odev = minor_to_device(resync_after); 1459a3f8f7dcSLars Ellenberg if (!odev) 1460841ce241SAndreas Gruenbacher return 1; 1461b411b363SPhilipp Reisner if ((odev->state.conn >= C_SYNC_SOURCE && 1462b411b363SPhilipp Reisner odev->state.conn <= C_PAUSED_SYNC_T) || 1463b411b363SPhilipp Reisner odev->state.aftr_isp || odev->state.peer_isp || 1464b411b363SPhilipp Reisner odev->state.user_isp) 1465b411b363SPhilipp Reisner return 0; 1466b411b363SPhilipp Reisner } 1467b411b363SPhilipp Reisner } 1468b411b363SPhilipp Reisner 1469b411b363SPhilipp Reisner /** 1470b411b363SPhilipp Reisner * _drbd_pause_after() - Pause resync on all devices that may not resync now 1471b30ab791SAndreas Gruenbacher * @device: DRBD device. 1472b411b363SPhilipp Reisner * 1473b411b363SPhilipp Reisner * Called from process context only (admin command and after_state_ch). 1474b411b363SPhilipp Reisner */ 1475b30ab791SAndreas Gruenbacher static int _drbd_pause_after(struct drbd_device *device) 1476b411b363SPhilipp Reisner { 147754761697SAndreas Gruenbacher struct drbd_device *odev; 1478b411b363SPhilipp Reisner int i, rv = 0; 1479b411b363SPhilipp Reisner 1480695d08faSPhilipp Reisner rcu_read_lock(); 148105a10ec7SAndreas Gruenbacher idr_for_each_entry(&drbd_devices, odev, i) { 1482b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1483b411b363SPhilipp Reisner continue; 1484b411b363SPhilipp Reisner if (!_drbd_may_sync_now(odev)) 1485b411b363SPhilipp Reisner rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) 1486b411b363SPhilipp Reisner != SS_NOTHING_TO_DO); 1487b411b363SPhilipp Reisner } 1488695d08faSPhilipp Reisner rcu_read_unlock(); 1489b411b363SPhilipp Reisner 1490b411b363SPhilipp Reisner return rv; 1491b411b363SPhilipp Reisner } 1492b411b363SPhilipp Reisner 1493b411b363SPhilipp Reisner /** 1494b411b363SPhilipp Reisner * _drbd_resume_next() - Resume resync on all devices that may resync now 1495b30ab791SAndreas Gruenbacher * @device: DRBD device. 1496b411b363SPhilipp Reisner * 1497b411b363SPhilipp Reisner * Called from process context only (admin command and worker). 1498b411b363SPhilipp Reisner */ 1499b30ab791SAndreas Gruenbacher static int _drbd_resume_next(struct drbd_device *device) 1500b411b363SPhilipp Reisner { 150154761697SAndreas Gruenbacher struct drbd_device *odev; 1502b411b363SPhilipp Reisner int i, rv = 0; 1503b411b363SPhilipp Reisner 1504695d08faSPhilipp Reisner rcu_read_lock(); 150505a10ec7SAndreas Gruenbacher idr_for_each_entry(&drbd_devices, odev, i) { 1506b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1507b411b363SPhilipp Reisner continue; 1508b411b363SPhilipp Reisner if (odev->state.aftr_isp) { 1509b411b363SPhilipp Reisner if (_drbd_may_sync_now(odev)) 1510b411b363SPhilipp Reisner rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), 1511b411b363SPhilipp Reisner CS_HARD, NULL) 1512b411b363SPhilipp Reisner != SS_NOTHING_TO_DO) ; 1513b411b363SPhilipp Reisner } 1514b411b363SPhilipp Reisner } 1515695d08faSPhilipp Reisner rcu_read_unlock(); 1516b411b363SPhilipp Reisner return rv; 1517b411b363SPhilipp Reisner } 1518b411b363SPhilipp Reisner 1519b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device) 1520b411b363SPhilipp Reisner { 1521b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1522b30ab791SAndreas Gruenbacher _drbd_resume_next(device); 1523b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1524b411b363SPhilipp Reisner } 1525b411b363SPhilipp Reisner 1526b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device) 1527b411b363SPhilipp Reisner { 1528b411b363SPhilipp Reisner write_lock_irq(&global_state_lock); 1529b30ab791SAndreas Gruenbacher _drbd_pause_after(device); 1530b411b363SPhilipp Reisner write_unlock_irq(&global_state_lock); 1531b411b363SPhilipp Reisner } 1532b411b363SPhilipp Reisner 1533dc97b708SPhilipp Reisner /* caller must hold global_state_lock */ 1534b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) 1535b411b363SPhilipp Reisner { 153654761697SAndreas Gruenbacher struct drbd_device *odev; 153795f8efd0SAndreas Gruenbacher int resync_after; 1538b411b363SPhilipp Reisner 1539b411b363SPhilipp Reisner if (o_minor == -1) 1540b411b363SPhilipp Reisner return NO_ERROR; 1541a3f8f7dcSLars Ellenberg if (o_minor < -1 || o_minor > MINORMASK) 154295f8efd0SAndreas Gruenbacher return ERR_RESYNC_AFTER; 1543b411b363SPhilipp Reisner 1544b411b363SPhilipp Reisner /* check for loops */ 1545b30ab791SAndreas Gruenbacher odev = minor_to_device(o_minor); 1546b411b363SPhilipp Reisner while (1) { 1547b30ab791SAndreas Gruenbacher if (odev == device) 154895f8efd0SAndreas Gruenbacher return ERR_RESYNC_AFTER_CYCLE; 1549b411b363SPhilipp Reisner 1550a3f8f7dcSLars Ellenberg /* You are free to depend on diskless, non-existing, 1551a3f8f7dcSLars Ellenberg * or not yet/no longer existing minors. 1552a3f8f7dcSLars Ellenberg * We only reject dependency loops. 1553a3f8f7dcSLars Ellenberg * We cannot follow the dependency chain beyond a detached or 1554a3f8f7dcSLars Ellenberg * missing minor. 1555a3f8f7dcSLars Ellenberg */ 1556a3f8f7dcSLars Ellenberg if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS) 1557a3f8f7dcSLars Ellenberg return NO_ERROR; 1558a3f8f7dcSLars Ellenberg 1559daeda1ccSPhilipp Reisner rcu_read_lock(); 156095f8efd0SAndreas Gruenbacher resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 1561daeda1ccSPhilipp Reisner rcu_read_unlock(); 1562b411b363SPhilipp Reisner /* dependency chain ends here, no cycles. */ 156395f8efd0SAndreas Gruenbacher if (resync_after == -1) 1564b411b363SPhilipp Reisner return NO_ERROR; 1565b411b363SPhilipp Reisner 1566b411b363SPhilipp Reisner /* follow the dependency chain */ 1567b30ab791SAndreas Gruenbacher odev = minor_to_device(resync_after); 1568b411b363SPhilipp Reisner } 1569b411b363SPhilipp Reisner } 1570b411b363SPhilipp Reisner 1571dc97b708SPhilipp Reisner /* caller must hold global_state_lock */ 1572b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device) 1573b411b363SPhilipp Reisner { 1574b411b363SPhilipp Reisner int changes; 1575b411b363SPhilipp Reisner 1576b411b363SPhilipp Reisner do { 1577b30ab791SAndreas Gruenbacher changes = _drbd_pause_after(device); 1578b30ab791SAndreas Gruenbacher changes |= _drbd_resume_next(device); 1579b411b363SPhilipp Reisner } while (changes); 1580b411b363SPhilipp Reisner } 1581b411b363SPhilipp Reisner 1582b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device) 15839bd28d3cSLars Ellenberg { 1584813472ceSPhilipp Reisner struct fifo_buffer *plan; 1585813472ceSPhilipp Reisner 1586b30ab791SAndreas Gruenbacher atomic_set(&device->rs_sect_in, 0); 1587b30ab791SAndreas Gruenbacher atomic_set(&device->rs_sect_ev, 0); 1588b30ab791SAndreas Gruenbacher device->rs_in_flight = 0; 1589813472ceSPhilipp Reisner 1590813472ceSPhilipp Reisner /* Updating the RCU protected object in place is necessary since 1591813472ceSPhilipp Reisner this function gets called from atomic context. 1592813472ceSPhilipp Reisner It is valid since all other updates also lead to an completely 1593813472ceSPhilipp Reisner empty fifo */ 1594813472ceSPhilipp Reisner rcu_read_lock(); 1595b30ab791SAndreas Gruenbacher plan = rcu_dereference(device->rs_plan_s); 1596813472ceSPhilipp Reisner plan->total = 0; 1597813472ceSPhilipp Reisner fifo_set(plan, 0); 1598813472ceSPhilipp Reisner rcu_read_unlock(); 15999bd28d3cSLars Ellenberg } 16009bd28d3cSLars Ellenberg 16011f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data) 16021f04af33SPhilipp Reisner { 1603b30ab791SAndreas Gruenbacher struct drbd_device *device = (struct drbd_device *) data; 16041f04af33SPhilipp Reisner 160584b8c06bSAndreas Gruenbacher drbd_queue_work(&first_peer_device(device)->connection->sender_work, 160684b8c06bSAndreas Gruenbacher &device->start_resync_work); 16071f04af33SPhilipp Reisner } 16081f04af33SPhilipp Reisner 160999920dc5SAndreas Gruenbacher int w_start_resync(struct drbd_work *w, int cancel) 16101f04af33SPhilipp Reisner { 161184b8c06bSAndreas Gruenbacher struct drbd_device *device = 161284b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, start_resync_work); 161300d56944SPhilipp Reisner 1614b30ab791SAndreas Gruenbacher if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { 1615d0180171SAndreas Gruenbacher drbd_warn(device, "w_start_resync later...\n"); 1616b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ/10; 1617b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 161899920dc5SAndreas Gruenbacher return 0; 16191f04af33SPhilipp Reisner } 16201f04af33SPhilipp Reisner 1621b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_SOURCE); 1622b30ab791SAndreas Gruenbacher clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); 162399920dc5SAndreas Gruenbacher return 0; 16241f04af33SPhilipp Reisner } 16251f04af33SPhilipp Reisner 1626b411b363SPhilipp Reisner /** 1627b411b363SPhilipp Reisner * drbd_start_resync() - Start the resync process 1628b30ab791SAndreas Gruenbacher * @device: DRBD device. 1629b411b363SPhilipp Reisner * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET 1630b411b363SPhilipp Reisner * 1631b411b363SPhilipp Reisner * This function might bring you directly into one of the 1632b411b363SPhilipp Reisner * C_PAUSED_SYNC_* states. 1633b411b363SPhilipp Reisner */ 1634b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) 1635b411b363SPhilipp Reisner { 1636b411b363SPhilipp Reisner union drbd_state ns; 1637b411b363SPhilipp Reisner int r; 1638b411b363SPhilipp Reisner 1639b30ab791SAndreas Gruenbacher if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) { 1640d0180171SAndreas Gruenbacher drbd_err(device, "Resync already running!\n"); 1641b411b363SPhilipp Reisner return; 1642b411b363SPhilipp Reisner } 1643b411b363SPhilipp Reisner 1644b30ab791SAndreas Gruenbacher if (!test_bit(B_RS_H_DONE, &device->flags)) { 1645b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) { 1646b411b363SPhilipp Reisner /* Since application IO was locked out during C_WF_BITMAP_T and 1647b411b363SPhilipp Reisner C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET 1648b411b363SPhilipp Reisner we check that we might make the data inconsistent. */ 1649b30ab791SAndreas Gruenbacher r = drbd_khelper(device, "before-resync-target"); 1650b411b363SPhilipp Reisner r = (r >> 8) & 0xff; 1651b411b363SPhilipp Reisner if (r > 0) { 1652d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-target handler returned %d, " 1653b411b363SPhilipp Reisner "dropping connection.\n", r); 1654a6b32bc3SAndreas Gruenbacher conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD); 1655b411b363SPhilipp Reisner return; 1656b411b363SPhilipp Reisner } 165709b9e797SPhilipp Reisner } else /* C_SYNC_SOURCE */ { 1658b30ab791SAndreas Gruenbacher r = drbd_khelper(device, "before-resync-source"); 165909b9e797SPhilipp Reisner r = (r >> 8) & 0xff; 166009b9e797SPhilipp Reisner if (r > 0) { 166109b9e797SPhilipp Reisner if (r == 3) { 1662d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-source handler returned %d, " 166309b9e797SPhilipp Reisner "ignoring. Old userland tools?", r); 166409b9e797SPhilipp Reisner } else { 1665d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-source handler returned %d, " 166609b9e797SPhilipp Reisner "dropping connection.\n", r); 1667a6b32bc3SAndreas Gruenbacher conn_request_state(first_peer_device(device)->connection, 1668a6b32bc3SAndreas Gruenbacher NS(conn, C_DISCONNECTING), CS_HARD); 166909b9e797SPhilipp Reisner return; 167009b9e797SPhilipp Reisner } 167109b9e797SPhilipp Reisner } 1672b411b363SPhilipp Reisner } 1673e64a3294SPhilipp Reisner } 1674b411b363SPhilipp Reisner 1675a6b32bc3SAndreas Gruenbacher if (current == first_peer_device(device)->connection->worker.task) { 1676dad20554SPhilipp Reisner /* The worker should not sleep waiting for state_mutex, 1677e64a3294SPhilipp Reisner that can take long */ 1678b30ab791SAndreas Gruenbacher if (!mutex_trylock(device->state_mutex)) { 1679b30ab791SAndreas Gruenbacher set_bit(B_RS_H_DONE, &device->flags); 1680b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ/5; 1681b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 1682e64a3294SPhilipp Reisner return; 1683e64a3294SPhilipp Reisner } 1684e64a3294SPhilipp Reisner } else { 1685b30ab791SAndreas Gruenbacher mutex_lock(device->state_mutex); 1686e64a3294SPhilipp Reisner } 1687b30ab791SAndreas Gruenbacher clear_bit(B_RS_H_DONE, &device->flags); 1688b411b363SPhilipp Reisner 1689074f4afeSLars Ellenberg /* req_lock: serialize with drbd_send_and_submit() and others 1690074f4afeSLars Ellenberg * global_state_lock: for stable sync-after dependencies */ 1691074f4afeSLars Ellenberg spin_lock_irq(&device->resource->req_lock); 1692074f4afeSLars Ellenberg write_lock(&global_state_lock); 1693a700471bSPhilipp Reisner /* Did some connection breakage or IO error race with us? */ 1694b30ab791SAndreas Gruenbacher if (device->state.conn < C_CONNECTED 1695b30ab791SAndreas Gruenbacher || !get_ldev_if_state(device, D_NEGOTIATING)) { 1696074f4afeSLars Ellenberg write_unlock(&global_state_lock); 1697074f4afeSLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 1698b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 1699b411b363SPhilipp Reisner return; 1700b411b363SPhilipp Reisner } 1701b411b363SPhilipp Reisner 1702b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 1703b411b363SPhilipp Reisner 1704b30ab791SAndreas Gruenbacher ns.aftr_isp = !_drbd_may_sync_now(device); 1705b411b363SPhilipp Reisner 1706b411b363SPhilipp Reisner ns.conn = side; 1707b411b363SPhilipp Reisner 1708b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) 1709b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 1710b411b363SPhilipp Reisner else /* side == C_SYNC_SOURCE */ 1711b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 1712b411b363SPhilipp Reisner 1713b30ab791SAndreas Gruenbacher r = __drbd_set_state(device, ns, CS_VERBOSE, NULL); 1714b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 1715b411b363SPhilipp Reisner 1716b411b363SPhilipp Reisner if (ns.conn < C_CONNECTED) 1717b411b363SPhilipp Reisner r = SS_UNKNOWN_ERROR; 1718b411b363SPhilipp Reisner 1719b411b363SPhilipp Reisner if (r == SS_SUCCESS) { 1720b30ab791SAndreas Gruenbacher unsigned long tw = drbd_bm_total_weight(device); 17211d7734a0SLars Ellenberg unsigned long now = jiffies; 17221d7734a0SLars Ellenberg int i; 17231d7734a0SLars Ellenberg 1724b30ab791SAndreas Gruenbacher device->rs_failed = 0; 1725b30ab791SAndreas Gruenbacher device->rs_paused = 0; 1726b30ab791SAndreas Gruenbacher device->rs_same_csum = 0; 1727b30ab791SAndreas Gruenbacher device->rs_last_events = 0; 1728b30ab791SAndreas Gruenbacher device->rs_last_sect_ev = 0; 1729b30ab791SAndreas Gruenbacher device->rs_total = tw; 1730b30ab791SAndreas Gruenbacher device->rs_start = now; 17311d7734a0SLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 1732b30ab791SAndreas Gruenbacher device->rs_mark_left[i] = tw; 1733b30ab791SAndreas Gruenbacher device->rs_mark_time[i] = now; 17341d7734a0SLars Ellenberg } 1735b30ab791SAndreas Gruenbacher _drbd_pause_after(device); 1736b411b363SPhilipp Reisner } 1737074f4afeSLars Ellenberg write_unlock(&global_state_lock); 1738074f4afeSLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 17395a22db89SLars Ellenberg 17406c922ed5SLars Ellenberg if (r == SS_SUCCESS) { 1741328e0f12SPhilipp Reisner /* reset rs_last_bcast when a resync or verify is started, 1742328e0f12SPhilipp Reisner * to deal with potential jiffies wrap. */ 1743b30ab791SAndreas Gruenbacher device->rs_last_bcast = jiffies - HZ; 1744328e0f12SPhilipp Reisner 1745d0180171SAndreas Gruenbacher drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", 17466c922ed5SLars Ellenberg drbd_conn_str(ns.conn), 1747b30ab791SAndreas Gruenbacher (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), 1748b30ab791SAndreas Gruenbacher (unsigned long) device->rs_total); 17495a22db89SLars Ellenberg if (side == C_SYNC_TARGET) 1750b30ab791SAndreas Gruenbacher device->bm_resync_fo = 0; 17515a22db89SLars Ellenberg 17525a22db89SLars Ellenberg /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid 17535a22db89SLars Ellenberg * with w_send_oos, or the sync target will get confused as to 17545a22db89SLars Ellenberg * how much bits to resync. We cannot do that always, because for an 17555a22db89SLars Ellenberg * empty resync and protocol < 95, we need to do it here, as we call 17565a22db89SLars Ellenberg * drbd_resync_finished from here in that case. 17575a22db89SLars Ellenberg * We drbd_gen_and_send_sync_uuid here for protocol < 96, 17585a22db89SLars Ellenberg * and from after_state_ch otherwise. */ 1759a6b32bc3SAndreas Gruenbacher if (side == C_SYNC_SOURCE && 1760a6b32bc3SAndreas Gruenbacher first_peer_device(device)->connection->agreed_pro_version < 96) 176169a22773SAndreas Gruenbacher drbd_gen_and_send_sync_uuid(first_peer_device(device)); 1762b411b363SPhilipp Reisner 1763a6b32bc3SAndreas Gruenbacher if (first_peer_device(device)->connection->agreed_pro_version < 95 && 1764a6b32bc3SAndreas Gruenbacher device->rs_total == 0) { 1765af85e8e8SLars Ellenberg /* This still has a race (about when exactly the peers 1766af85e8e8SLars Ellenberg * detect connection loss) that can lead to a full sync 1767af85e8e8SLars Ellenberg * on next handshake. In 8.3.9 we fixed this with explicit 1768af85e8e8SLars Ellenberg * resync-finished notifications, but the fix 1769af85e8e8SLars Ellenberg * introduces a protocol change. Sleeping for some 1770af85e8e8SLars Ellenberg * time longer than the ping interval + timeout on the 1771af85e8e8SLars Ellenberg * SyncSource, to give the SyncTarget the chance to 1772af85e8e8SLars Ellenberg * detect connection loss, then waiting for a ping 1773af85e8e8SLars Ellenberg * response (implicit in drbd_resync_finished) reduces 1774af85e8e8SLars Ellenberg * the race considerably, but does not solve it. */ 177544ed167dSPhilipp Reisner if (side == C_SYNC_SOURCE) { 177644ed167dSPhilipp Reisner struct net_conf *nc; 177744ed167dSPhilipp Reisner int timeo; 177844ed167dSPhilipp Reisner 177944ed167dSPhilipp Reisner rcu_read_lock(); 1780a6b32bc3SAndreas Gruenbacher nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 178144ed167dSPhilipp Reisner timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; 178244ed167dSPhilipp Reisner rcu_read_unlock(); 178344ed167dSPhilipp Reisner schedule_timeout_interruptible(timeo); 178444ed167dSPhilipp Reisner } 1785b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 1786b411b363SPhilipp Reisner } 1787b411b363SPhilipp Reisner 1788b30ab791SAndreas Gruenbacher drbd_rs_controller_reset(device); 1789b30ab791SAndreas Gruenbacher /* ns.conn may already be != device->state.conn, 1790b411b363SPhilipp Reisner * we may have been paused in between, or become paused until 1791b411b363SPhilipp Reisner * the timer triggers. 1792b411b363SPhilipp Reisner * No matter, that is handled in resync_timer_fn() */ 1793b411b363SPhilipp Reisner if (ns.conn == C_SYNC_TARGET) 1794b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies); 1795b411b363SPhilipp Reisner 1796b30ab791SAndreas Gruenbacher drbd_md_sync(device); 1797b411b363SPhilipp Reisner } 1798b30ab791SAndreas Gruenbacher put_ldev(device); 1799b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 1800b411b363SPhilipp Reisner } 1801b411b363SPhilipp Reisner 1802a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) 18038c0785a5SLars Ellenberg { 18048c0785a5SLars Ellenberg spin_lock_irq(&queue->q_lock); 18058c0785a5SLars Ellenberg list_splice_init(&queue->q, work_list); 18068c0785a5SLars Ellenberg spin_unlock_irq(&queue->q_lock); 18078c0785a5SLars Ellenberg return !list_empty(work_list); 18088c0785a5SLars Ellenberg } 18098c0785a5SLars Ellenberg 1810a186e478SRashika Kheria static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list) 18118c0785a5SLars Ellenberg { 18128c0785a5SLars Ellenberg spin_lock_irq(&queue->q_lock); 18138c0785a5SLars Ellenberg if (!list_empty(&queue->q)) 18148c0785a5SLars Ellenberg list_move(queue->q.next, work_list); 18158c0785a5SLars Ellenberg spin_unlock_irq(&queue->q_lock); 18168c0785a5SLars Ellenberg return !list_empty(work_list); 18178c0785a5SLars Ellenberg } 18188c0785a5SLars Ellenberg 1819bde89a9eSAndreas Gruenbacher static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list) 1820b6dd1a89SLars Ellenberg { 1821b6dd1a89SLars Ellenberg DEFINE_WAIT(wait); 1822b6dd1a89SLars Ellenberg struct net_conf *nc; 1823b6dd1a89SLars Ellenberg int uncork, cork; 1824b6dd1a89SLars Ellenberg 1825b6dd1a89SLars Ellenberg dequeue_work_item(&connection->sender_work, work_list); 1826b6dd1a89SLars Ellenberg if (!list_empty(work_list)) 1827b6dd1a89SLars Ellenberg return; 1828b6dd1a89SLars Ellenberg 1829b6dd1a89SLars Ellenberg /* Still nothing to do? 1830b6dd1a89SLars Ellenberg * Maybe we still need to close the current epoch, 1831b6dd1a89SLars Ellenberg * even if no new requests are queued yet. 1832b6dd1a89SLars Ellenberg * 1833b6dd1a89SLars Ellenberg * Also, poke TCP, just in case. 1834b6dd1a89SLars Ellenberg * Then wait for new work (or signal). */ 1835b6dd1a89SLars Ellenberg rcu_read_lock(); 1836b6dd1a89SLars Ellenberg nc = rcu_dereference(connection->net_conf); 1837b6dd1a89SLars Ellenberg uncork = nc ? nc->tcp_cork : 0; 1838b6dd1a89SLars Ellenberg rcu_read_unlock(); 1839b6dd1a89SLars Ellenberg if (uncork) { 1840b6dd1a89SLars Ellenberg mutex_lock(&connection->data.mutex); 1841b6dd1a89SLars Ellenberg if (connection->data.socket) 1842b6dd1a89SLars Ellenberg drbd_tcp_uncork(connection->data.socket); 1843b6dd1a89SLars Ellenberg mutex_unlock(&connection->data.mutex); 1844b6dd1a89SLars Ellenberg } 1845b6dd1a89SLars Ellenberg 1846b6dd1a89SLars Ellenberg for (;;) { 1847b6dd1a89SLars Ellenberg int send_barrier; 1848b6dd1a89SLars Ellenberg prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); 18490500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock); 1850b6dd1a89SLars Ellenberg spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 1851bc317a9eSLars Ellenberg /* dequeue single item only, 1852bc317a9eSLars Ellenberg * we still use drbd_queue_work_front() in some places */ 1853bc317a9eSLars Ellenberg if (!list_empty(&connection->sender_work.q)) 1854bc317a9eSLars Ellenberg list_move(connection->sender_work.q.next, work_list); 1855b6dd1a89SLars Ellenberg spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 1856b6dd1a89SLars Ellenberg if (!list_empty(work_list) || signal_pending(current)) { 18570500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 1858b6dd1a89SLars Ellenberg break; 1859b6dd1a89SLars Ellenberg } 1860f9c78128SLars Ellenberg 1861f9c78128SLars Ellenberg /* We found nothing new to do, no to-be-communicated request, 1862f9c78128SLars Ellenberg * no other work item. We may still need to close the last 1863f9c78128SLars Ellenberg * epoch. Next incoming request epoch will be connection -> 1864f9c78128SLars Ellenberg * current transfer log epoch number. If that is different 1865f9c78128SLars Ellenberg * from the epoch of the last request we communicated, it is 1866f9c78128SLars Ellenberg * safe to send the epoch separating barrier now. 1867f9c78128SLars Ellenberg */ 1868f9c78128SLars Ellenberg send_barrier = 1869f9c78128SLars Ellenberg atomic_read(&connection->current_tle_nr) != 1870f9c78128SLars Ellenberg connection->send.current_epoch_nr; 18710500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 1872f9c78128SLars Ellenberg 1873f9c78128SLars Ellenberg if (send_barrier) 1874f9c78128SLars Ellenberg maybe_send_barrier(connection, 1875f9c78128SLars Ellenberg connection->send.current_epoch_nr + 1); 1876b6dd1a89SLars Ellenberg schedule(); 1877b6dd1a89SLars Ellenberg /* may be woken up for other things but new work, too, 1878b6dd1a89SLars Ellenberg * e.g. if the current epoch got closed. 1879b6dd1a89SLars Ellenberg * In which case we send the barrier above. */ 1880b6dd1a89SLars Ellenberg } 1881b6dd1a89SLars Ellenberg finish_wait(&connection->sender_work.q_wait, &wait); 1882b6dd1a89SLars Ellenberg 1883b6dd1a89SLars Ellenberg /* someone may have changed the config while we have been waiting above. */ 1884b6dd1a89SLars Ellenberg rcu_read_lock(); 1885b6dd1a89SLars Ellenberg nc = rcu_dereference(connection->net_conf); 1886b6dd1a89SLars Ellenberg cork = nc ? nc->tcp_cork : 0; 1887b6dd1a89SLars Ellenberg rcu_read_unlock(); 1888b6dd1a89SLars Ellenberg mutex_lock(&connection->data.mutex); 1889b6dd1a89SLars Ellenberg if (connection->data.socket) { 1890b6dd1a89SLars Ellenberg if (cork) 1891b6dd1a89SLars Ellenberg drbd_tcp_cork(connection->data.socket); 1892b6dd1a89SLars Ellenberg else if (!uncork) 1893b6dd1a89SLars Ellenberg drbd_tcp_uncork(connection->data.socket); 1894b6dd1a89SLars Ellenberg } 1895b6dd1a89SLars Ellenberg mutex_unlock(&connection->data.mutex); 1896b6dd1a89SLars Ellenberg } 1897b6dd1a89SLars Ellenberg 1898b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi) 1899b411b363SPhilipp Reisner { 1900bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 19016db7e50aSAndreas Gruenbacher struct drbd_work *w = NULL; 1902c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 1903b411b363SPhilipp Reisner LIST_HEAD(work_list); 19048c0785a5SLars Ellenberg int vnr; 1905b411b363SPhilipp Reisner 1906e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) { 190780822284SPhilipp Reisner drbd_thread_current_set_cpu(thi); 1908b411b363SPhilipp Reisner 19098c0785a5SLars Ellenberg /* as long as we use drbd_queue_work_front(), 19108c0785a5SLars Ellenberg * we may only dequeue single work items here, not batches. */ 19118c0785a5SLars Ellenberg if (list_empty(&work_list)) 1912bde89a9eSAndreas Gruenbacher wait_for_work(connection, &work_list); 1913b411b363SPhilipp Reisner 19148c0785a5SLars Ellenberg if (signal_pending(current)) { 1915b411b363SPhilipp Reisner flush_signals(current); 191619393e10SPhilipp Reisner if (get_t_state(thi) == RUNNING) { 19171ec861ebSAndreas Gruenbacher drbd_warn(connection, "Worker got an unexpected signal\n"); 1918b411b363SPhilipp Reisner continue; 191919393e10SPhilipp Reisner } 1920b411b363SPhilipp Reisner break; 1921b411b363SPhilipp Reisner } 1922b411b363SPhilipp Reisner 1923e77a0a5cSAndreas Gruenbacher if (get_t_state(thi) != RUNNING) 1924b411b363SPhilipp Reisner break; 1925b411b363SPhilipp Reisner 19268c0785a5SLars Ellenberg while (!list_empty(&work_list)) { 19276db7e50aSAndreas Gruenbacher w = list_first_entry(&work_list, struct drbd_work, list); 19286db7e50aSAndreas Gruenbacher list_del_init(&w->list); 19296db7e50aSAndreas Gruenbacher if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) 19308c0785a5SLars Ellenberg continue; 1931bde89a9eSAndreas Gruenbacher if (connection->cstate >= C_WF_REPORT_PARAMS) 1932bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 1933b411b363SPhilipp Reisner } 1934b411b363SPhilipp Reisner } 1935b411b363SPhilipp Reisner 19368c0785a5SLars Ellenberg do { 1937b411b363SPhilipp Reisner while (!list_empty(&work_list)) { 19386db7e50aSAndreas Gruenbacher w = list_first_entry(&work_list, struct drbd_work, list); 19396db7e50aSAndreas Gruenbacher list_del_init(&w->list); 19406db7e50aSAndreas Gruenbacher w->cb(w, 1); 1941b411b363SPhilipp Reisner } 1942bde89a9eSAndreas Gruenbacher dequeue_work_batch(&connection->sender_work, &work_list); 19438c0785a5SLars Ellenberg } while (!list_empty(&work_list)); 1944b411b363SPhilipp Reisner 1945c141ebdaSPhilipp Reisner rcu_read_lock(); 1946c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1947c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 19480b0ba1efSAndreas Gruenbacher D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE); 1949b30ab791SAndreas Gruenbacher kref_get(&device->kref); 1950c141ebdaSPhilipp Reisner rcu_read_unlock(); 1951b30ab791SAndreas Gruenbacher drbd_device_cleanup(device); 195205a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 1953c141ebdaSPhilipp Reisner rcu_read_lock(); 19540e29d163SPhilipp Reisner } 1955c141ebdaSPhilipp Reisner rcu_read_unlock(); 1956b411b363SPhilipp Reisner 1957b411b363SPhilipp Reisner return 0; 1958b411b363SPhilipp Reisner } 1959