1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_worker.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner 24b411b363SPhilipp Reisner */ 25b411b363SPhilipp Reisner 26b411b363SPhilipp Reisner #include <linux/module.h> 27b411b363SPhilipp Reisner #include <linux/drbd.h> 28174cd4b1SIngo Molnar #include <linux/sched/signal.h> 29b411b363SPhilipp Reisner #include <linux/wait.h> 30b411b363SPhilipp Reisner #include <linux/mm.h> 31b411b363SPhilipp Reisner #include <linux/memcontrol.h> 32b411b363SPhilipp Reisner #include <linux/mm_inline.h> 33b411b363SPhilipp Reisner #include <linux/slab.h> 34b411b363SPhilipp Reisner #include <linux/random.h> 35b411b363SPhilipp Reisner #include <linux/string.h> 36b411b363SPhilipp Reisner #include <linux/scatterlist.h> 37b411b363SPhilipp Reisner 38b411b363SPhilipp Reisner #include "drbd_int.h" 39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h" 40b411b363SPhilipp Reisner #include "drbd_req.h" 41b411b363SPhilipp Reisner 42d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *, int); 43d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *, int); 44b411b363SPhilipp Reisner 45c5a91619SAndreas Gruenbacher /* endio handlers: 46ed15b795SAndreas Gruenbacher * drbd_md_endio (defined here) 47fcefa62eSAndreas Gruenbacher * drbd_request_endio (defined here) 48fcefa62eSAndreas Gruenbacher * drbd_peer_request_endio (defined here) 49ed15b795SAndreas Gruenbacher * drbd_bm_endio (defined in drbd_bitmap.c) 50c5a91619SAndreas Gruenbacher * 51b411b363SPhilipp Reisner * For all these callbacks, note the following: 52b411b363SPhilipp Reisner * The callbacks will be called in irq context by the IDE drivers, 53b411b363SPhilipp Reisner * and in Softirqs/Tasklets/BH context by the SCSI drivers. 54b411b363SPhilipp Reisner * Try to get the locking right :) 55b411b363SPhilipp Reisner * 56b411b363SPhilipp Reisner */ 57b411b363SPhilipp Reisner 58b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO 59b411b363SPhilipp Reisner * submitted by drbd_md_sync_page_io() 60b411b363SPhilipp Reisner */ 614246a0b6SChristoph Hellwig void drbd_md_endio(struct bio *bio) 62b411b363SPhilipp Reisner { 63b30ab791SAndreas Gruenbacher struct drbd_device *device; 64b411b363SPhilipp Reisner 65e37d2438SLars Ellenberg device = bio->bi_private; 664e4cbee9SChristoph Hellwig device->md_io.error = blk_status_to_errno(bio->bi_status); 67b411b363SPhilipp Reisner 687c752ed3SLars Ellenberg /* special case: drbd_md_read() during drbd_adm_attach() */ 697c752ed3SLars Ellenberg if (device->ldev) 707c752ed3SLars Ellenberg put_ldev(device); 717c752ed3SLars Ellenberg bio_put(bio); 727c752ed3SLars Ellenberg 730cfac5ddSPhilipp Reisner /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able 740cfac5ddSPhilipp Reisner * to timeout on the lower level device, and eventually detach from it. 750cfac5ddSPhilipp Reisner * If this io completion runs after that timeout expired, this 760cfac5ddSPhilipp Reisner * drbd_md_put_buffer() may allow us to finally try and re-attach. 770cfac5ddSPhilipp Reisner * During normal operation, this only puts that extra reference 780cfac5ddSPhilipp Reisner * down to 1 again. 790cfac5ddSPhilipp Reisner * Make sure we first drop the reference, and only then signal 800cfac5ddSPhilipp Reisner * completion, or we may (in drbd_al_read_log()) cycle so fast into the 810cfac5ddSPhilipp Reisner * next drbd_md_sync_page_io(), that we trigger the 82b30ab791SAndreas Gruenbacher * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. 830cfac5ddSPhilipp Reisner */ 84b30ab791SAndreas Gruenbacher drbd_md_put_buffer(device); 85e37d2438SLars Ellenberg device->md_io.done = 1; 86b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 87b411b363SPhilipp Reisner } 88b411b363SPhilipp Reisner 89b411b363SPhilipp Reisner /* reads on behalf of the partner, 90b411b363SPhilipp Reisner * "submitted" by the receiver 91b411b363SPhilipp Reisner */ 92a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) 93b411b363SPhilipp Reisner { 94b411b363SPhilipp Reisner unsigned long flags = 0; 956780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 966780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 97b411b363SPhilipp Reisner 980500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 99b30ab791SAndreas Gruenbacher device->read_cnt += peer_req->i.size >> 9; 100a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 101b30ab791SAndreas Gruenbacher if (list_empty(&device->read_ee)) 102b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 103db830c46SAndreas Gruenbacher if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) 104b30ab791SAndreas Gruenbacher __drbd_chk_io_error(device, DRBD_READ_ERROR); 1050500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 106b411b363SPhilipp Reisner 1076780139cSAndreas Gruenbacher drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w); 108b30ab791SAndreas Gruenbacher put_ldev(device); 109b411b363SPhilipp Reisner } 110b411b363SPhilipp Reisner 111b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes, 11245bb912bSLars Ellenberg * "submitted" by the receiver, final stage. */ 113a0fb3c47SLars Ellenberg void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) 114b411b363SPhilipp Reisner { 115b411b363SPhilipp Reisner unsigned long flags = 0; 1166780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1176780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 118668700b4SPhilipp Reisner struct drbd_connection *connection = peer_device->connection; 119181286adSLars Ellenberg struct drbd_interval i; 120b411b363SPhilipp Reisner int do_wake; 121579b57edSAndreas Gruenbacher u64 block_id; 122b411b363SPhilipp Reisner int do_al_complete_io; 123b411b363SPhilipp Reisner 124db830c46SAndreas Gruenbacher /* after we moved peer_req to done_ee, 125b411b363SPhilipp Reisner * we may no longer access it, 126b411b363SPhilipp Reisner * it may be freed/reused already! 127b411b363SPhilipp Reisner * (as soon as we release the req_lock) */ 128181286adSLars Ellenberg i = peer_req->i; 129db830c46SAndreas Gruenbacher do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; 130db830c46SAndreas Gruenbacher block_id = peer_req->block_id; 13121ae5d7fSLars Ellenberg peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 132b411b363SPhilipp Reisner 133e1fbc4caSLars Ellenberg if (peer_req->flags & EE_WAS_ERROR) { 134e1fbc4caSLars Ellenberg /* In protocol != C, we usually do not send write acks. 135e1fbc4caSLars Ellenberg * In case of a write error, send the neg ack anyways. */ 136e1fbc4caSLars Ellenberg if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags)) 137e1fbc4caSLars Ellenberg inc_unacked(device); 138e1fbc4caSLars Ellenberg drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); 139e1fbc4caSLars Ellenberg } 140e1fbc4caSLars Ellenberg 1410500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 142b30ab791SAndreas Gruenbacher device->writ_cnt += peer_req->i.size >> 9; 143a8cd15baSAndreas Gruenbacher list_move_tail(&peer_req->w.list, &device->done_ee); 144b411b363SPhilipp Reisner 145bb3bfe96SAndreas Gruenbacher /* 1465e472264SAndreas Gruenbacher * Do not remove from the write_requests tree here: we did not send the 147bb3bfe96SAndreas Gruenbacher * Ack yet and did not wake possibly waiting conflicting requests. 148bb3bfe96SAndreas Gruenbacher * Removed from the tree from "drbd_process_done_ee" within the 14984b8c06bSAndreas Gruenbacher * appropriate dw.cb (e_end_block/e_end_resync_block) or from 150bb3bfe96SAndreas Gruenbacher * _drbd_clear_done_ee. 151bb3bfe96SAndreas Gruenbacher */ 152b411b363SPhilipp Reisner 153b30ab791SAndreas Gruenbacher do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); 154b411b363SPhilipp Reisner 155a0fb3c47SLars Ellenberg /* FIXME do we want to detach for failed REQ_DISCARD? 156a0fb3c47SLars Ellenberg * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ 157a0fb3c47SLars Ellenberg if (peer_req->flags & EE_WAS_ERROR) 158b30ab791SAndreas Gruenbacher __drbd_chk_io_error(device, DRBD_WRITE_ERROR); 159668700b4SPhilipp Reisner 160668700b4SPhilipp Reisner if (connection->cstate >= C_WF_REPORT_PARAMS) { 161668700b4SPhilipp Reisner kref_get(&device->kref); /* put is in drbd_send_acks_wf() */ 162668700b4SPhilipp Reisner if (!queue_work(connection->ack_sender, &peer_device->send_acks_work)) 163668700b4SPhilipp Reisner kref_put(&device->kref, drbd_destroy_device); 164668700b4SPhilipp Reisner } 1650500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 166b411b363SPhilipp Reisner 167579b57edSAndreas Gruenbacher if (block_id == ID_SYNCER) 168b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, i.sector); 169b411b363SPhilipp Reisner 170b411b363SPhilipp Reisner if (do_wake) 171b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 172b411b363SPhilipp Reisner 173b411b363SPhilipp Reisner if (do_al_complete_io) 174b30ab791SAndreas Gruenbacher drbd_al_complete_io(device, &i); 175b411b363SPhilipp Reisner 176b30ab791SAndreas Gruenbacher put_ldev(device); 17745bb912bSLars Ellenberg } 178b411b363SPhilipp Reisner 17945bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes, 18045bb912bSLars Ellenberg * "submitted" by the receiver. 18145bb912bSLars Ellenberg */ 1824246a0b6SChristoph Hellwig void drbd_peer_request_endio(struct bio *bio) 18345bb912bSLars Ellenberg { 184db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req = bio->bi_private; 185a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_req->peer_device->device; 1867e5fec31SFabian Frederick bool is_write = bio_data_dir(bio) == WRITE; 18745c21793SChristoph Hellwig bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || 18845c21793SChristoph Hellwig bio_op(bio) == REQ_OP_DISCARD; 18945bb912bSLars Ellenberg 1904e4cbee9SChristoph Hellwig if (bio->bi_status && __ratelimit(&drbd_ratelimit_state)) 191d0180171SAndreas Gruenbacher drbd_warn(device, "%s: error=%d s=%llus\n", 192a0fb3c47SLars Ellenberg is_write ? (is_discard ? "discard" : "write") 1934e4cbee9SChristoph Hellwig : "read", bio->bi_status, 194db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 19545bb912bSLars Ellenberg 1964e4cbee9SChristoph Hellwig if (bio->bi_status) 197db830c46SAndreas Gruenbacher set_bit(__EE_WAS_ERROR, &peer_req->flags); 19845bb912bSLars Ellenberg 19945bb912bSLars Ellenberg bio_put(bio); /* no need for the bio anymore */ 200db830c46SAndreas Gruenbacher if (atomic_dec_and_test(&peer_req->pending_bios)) { 20145bb912bSLars Ellenberg if (is_write) 202db830c46SAndreas Gruenbacher drbd_endio_write_sec_final(peer_req); 20345bb912bSLars Ellenberg else 204db830c46SAndreas Gruenbacher drbd_endio_read_sec_final(peer_req); 20545bb912bSLars Ellenberg } 206b411b363SPhilipp Reisner } 207b411b363SPhilipp Reisner 2081ffa7bfaSBaoyou Xie static void 2091ffa7bfaSBaoyou Xie drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device) 210142207f7SLars Ellenberg { 211142207f7SLars Ellenberg panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n", 212142207f7SLars Ellenberg device->minor, device->resource->name, device->vnr); 213142207f7SLars Ellenberg } 214142207f7SLars Ellenberg 215b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request 216b411b363SPhilipp Reisner */ 2174246a0b6SChristoph Hellwig void drbd_request_endio(struct bio *bio) 218b411b363SPhilipp Reisner { 219a115413dSLars Ellenberg unsigned long flags; 220b411b363SPhilipp Reisner struct drbd_request *req = bio->bi_private; 22184b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 222a115413dSLars Ellenberg struct bio_and_error m; 223b411b363SPhilipp Reisner enum drbd_req_event what; 2241b6dd252SPhilipp Reisner 2251b6dd252SPhilipp Reisner /* If this request was aborted locally before, 2261b6dd252SPhilipp Reisner * but now was completed "successfully", 2271b6dd252SPhilipp Reisner * chances are that this caused arbitrary data corruption. 2281b6dd252SPhilipp Reisner * 2291b6dd252SPhilipp Reisner * "aborting" requests, or force-detaching the disk, is intended for 2301b6dd252SPhilipp Reisner * completely blocked/hung local backing devices which do no longer 2311b6dd252SPhilipp Reisner * complete requests at all, not even do error completions. In this 2321b6dd252SPhilipp Reisner * situation, usually a hard-reset and failover is the only way out. 2331b6dd252SPhilipp Reisner * 2341b6dd252SPhilipp Reisner * By "aborting", basically faking a local error-completion, 2351b6dd252SPhilipp Reisner * we allow for a more graceful swichover by cleanly migrating services. 2361b6dd252SPhilipp Reisner * Still the affected node has to be rebooted "soon". 2371b6dd252SPhilipp Reisner * 2381b6dd252SPhilipp Reisner * By completing these requests, we allow the upper layers to re-use 2391b6dd252SPhilipp Reisner * the associated data pages. 2401b6dd252SPhilipp Reisner * 2411b6dd252SPhilipp Reisner * If later the local backing device "recovers", and now DMAs some data 2421b6dd252SPhilipp Reisner * from disk into the original request pages, in the best case it will 2431b6dd252SPhilipp Reisner * just put random data into unused pages; but typically it will corrupt 2441b6dd252SPhilipp Reisner * meanwhile completely unrelated data, causing all sorts of damage. 2451b6dd252SPhilipp Reisner * 2461b6dd252SPhilipp Reisner * Which means delayed successful completion, 2471b6dd252SPhilipp Reisner * especially for READ requests, 2481b6dd252SPhilipp Reisner * is a reason to panic(). 2491b6dd252SPhilipp Reisner * 2501b6dd252SPhilipp Reisner * We assume that a delayed *error* completion is OK, 2511b6dd252SPhilipp Reisner * though we still will complain noisily about it. 2521b6dd252SPhilipp Reisner */ 2531b6dd252SPhilipp Reisner if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { 2541b6dd252SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 255d0180171SAndreas Gruenbacher drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); 2561b6dd252SPhilipp Reisner 2574e4cbee9SChristoph Hellwig if (!bio->bi_status) 258142207f7SLars Ellenberg drbd_panic_after_delayed_completion_of_aborted_request(device); 2591b6dd252SPhilipp Reisner } 2601b6dd252SPhilipp Reisner 261b411b363SPhilipp Reisner /* to avoid recursion in __req_mod */ 2624e4cbee9SChristoph Hellwig if (unlikely(bio->bi_status)) { 26370246286SChristoph Hellwig switch (bio_op(bio)) { 26445c21793SChristoph Hellwig case REQ_OP_WRITE_ZEROES: 26570246286SChristoph Hellwig case REQ_OP_DISCARD: 2664e4cbee9SChristoph Hellwig if (bio->bi_status == BLK_STS_NOTSUPP) 26770246286SChristoph Hellwig what = DISCARD_COMPLETED_NOTSUPP; 2682f632aebSLars Ellenberg else 26970246286SChristoph Hellwig what = DISCARD_COMPLETED_WITH_ERROR; 27070246286SChristoph Hellwig break; 27170246286SChristoph Hellwig case REQ_OP_READ: 2721eff9d32SJens Axboe if (bio->bi_opf & REQ_RAHEAD) 27370246286SChristoph Hellwig what = READ_AHEAD_COMPLETED_WITH_ERROR; 27470246286SChristoph Hellwig else 27570246286SChristoph Hellwig what = READ_COMPLETED_WITH_ERROR; 27670246286SChristoph Hellwig break; 27770246286SChristoph Hellwig default: 27870246286SChristoph Hellwig what = WRITE_COMPLETED_WITH_ERROR; 27970246286SChristoph Hellwig break; 28070246286SChristoph Hellwig } 28170246286SChristoph Hellwig } else { 2828554df1cSAndreas Gruenbacher what = COMPLETED_OK; 28370246286SChristoph Hellwig } 284b411b363SPhilipp Reisner 2854e4cbee9SChristoph Hellwig req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status)); 28664dafbc9SLars Ellenberg bio_put(bio); 287b411b363SPhilipp Reisner 288a115413dSLars Ellenberg /* not req_mod(), we need irqsave here! */ 2890500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 290a115413dSLars Ellenberg __req_mod(req, what, &m); 2910500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 292b30ab791SAndreas Gruenbacher put_ldev(device); 293a115413dSLars Ellenberg 294a115413dSLars Ellenberg if (m.bio) 295b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 296b411b363SPhilipp Reisner } 297b411b363SPhilipp Reisner 2989534d671SHerbert Xu void drbd_csum_ee(struct crypto_ahash *tfm, struct drbd_peer_request *peer_req, void *digest) 29945bb912bSLars Ellenberg { 3009534d671SHerbert Xu AHASH_REQUEST_ON_STACK(req, tfm); 30145bb912bSLars Ellenberg struct scatterlist sg; 302db830c46SAndreas Gruenbacher struct page *page = peer_req->pages; 30345bb912bSLars Ellenberg struct page *tmp; 30445bb912bSLars Ellenberg unsigned len; 30545bb912bSLars Ellenberg 3069534d671SHerbert Xu ahash_request_set_tfm(req, tfm); 3079534d671SHerbert Xu ahash_request_set_callback(req, 0, NULL, NULL); 30845bb912bSLars Ellenberg 30945bb912bSLars Ellenberg sg_init_table(&sg, 1); 3109534d671SHerbert Xu crypto_ahash_init(req); 31145bb912bSLars Ellenberg 31245bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) { 31345bb912bSLars Ellenberg /* all but the last page will be fully used */ 31445bb912bSLars Ellenberg sg_set_page(&sg, page, PAGE_SIZE, 0); 3159534d671SHerbert Xu ahash_request_set_crypt(req, &sg, NULL, sg.length); 3169534d671SHerbert Xu crypto_ahash_update(req); 31745bb912bSLars Ellenberg page = tmp; 31845bb912bSLars Ellenberg } 31945bb912bSLars Ellenberg /* and now the last, possibly only partially used page */ 320db830c46SAndreas Gruenbacher len = peer_req->i.size & (PAGE_SIZE - 1); 32145bb912bSLars Ellenberg sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); 3229534d671SHerbert Xu ahash_request_set_crypt(req, &sg, digest, sg.length); 3239534d671SHerbert Xu crypto_ahash_finup(req); 3249534d671SHerbert Xu ahash_request_zero(req); 32545bb912bSLars Ellenberg } 32645bb912bSLars Ellenberg 3279534d671SHerbert Xu void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest) 328b411b363SPhilipp Reisner { 3299534d671SHerbert Xu AHASH_REQUEST_ON_STACK(req, tfm); 330b411b363SPhilipp Reisner struct scatterlist sg; 3317988613bSKent Overstreet struct bio_vec bvec; 3327988613bSKent Overstreet struct bvec_iter iter; 333b411b363SPhilipp Reisner 3349534d671SHerbert Xu ahash_request_set_tfm(req, tfm); 3359534d671SHerbert Xu ahash_request_set_callback(req, 0, NULL, NULL); 336b411b363SPhilipp Reisner 337b411b363SPhilipp Reisner sg_init_table(&sg, 1); 3389534d671SHerbert Xu crypto_ahash_init(req); 339b411b363SPhilipp Reisner 3407988613bSKent Overstreet bio_for_each_segment(bvec, bio, iter) { 3417988613bSKent Overstreet sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 3429534d671SHerbert Xu ahash_request_set_crypt(req, &sg, NULL, sg.length); 3439534d671SHerbert Xu crypto_ahash_update(req); 3449104d31aSLars Ellenberg /* REQ_OP_WRITE_SAME has only one segment, 3459104d31aSLars Ellenberg * checksum the payload only once. */ 3469104d31aSLars Ellenberg if (bio_op(bio) == REQ_OP_WRITE_SAME) 3479104d31aSLars Ellenberg break; 348b411b363SPhilipp Reisner } 3499534d671SHerbert Xu ahash_request_set_crypt(req, NULL, digest, 0); 3509534d671SHerbert Xu crypto_ahash_final(req); 3519534d671SHerbert Xu ahash_request_zero(req); 352b411b363SPhilipp Reisner } 353b411b363SPhilipp Reisner 3549676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */ 35599920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel) 356b411b363SPhilipp Reisner { 357a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 3586780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 3596780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 360b411b363SPhilipp Reisner int digest_size; 361b411b363SPhilipp Reisner void *digest; 36299920dc5SAndreas Gruenbacher int err = 0; 363b411b363SPhilipp Reisner 36453ea4331SLars Ellenberg if (unlikely(cancel)) 36553ea4331SLars Ellenberg goto out; 366b411b363SPhilipp Reisner 3679676c760SLars Ellenberg if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) 36853ea4331SLars Ellenberg goto out; 36953ea4331SLars Ellenberg 3709534d671SHerbert Xu digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm); 371b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 372b411b363SPhilipp Reisner if (digest) { 373db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 374db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 3756780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 3769676c760SLars Ellenberg /* Free peer_req and pages before send. 37753ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 37853ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 37953ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 380c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 381b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 382db830c46SAndreas Gruenbacher peer_req = NULL; 383b30ab791SAndreas Gruenbacher inc_rs_pending(device); 3846780139cSAndreas Gruenbacher err = drbd_send_drequest_csum(peer_device, sector, size, 38553ea4331SLars Ellenberg digest, digest_size, 386b411b363SPhilipp Reisner P_CSUM_RS_REQUEST); 387b411b363SPhilipp Reisner kfree(digest); 388b411b363SPhilipp Reisner } else { 389d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc() of digest failed.\n"); 39099920dc5SAndreas Gruenbacher err = -ENOMEM; 391b411b363SPhilipp Reisner } 392b411b363SPhilipp Reisner 39353ea4331SLars Ellenberg out: 394db830c46SAndreas Gruenbacher if (peer_req) 395b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 396b411b363SPhilipp Reisner 39799920dc5SAndreas Gruenbacher if (unlikely(err)) 398d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_drequest(..., csum) failed\n"); 39999920dc5SAndreas Gruenbacher return err; 400b411b363SPhilipp Reisner } 401b411b363SPhilipp Reisner 402b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 403b411b363SPhilipp Reisner 40469a22773SAndreas Gruenbacher static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size) 405b411b363SPhilipp Reisner { 40669a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 407db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 408b411b363SPhilipp Reisner 409b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 41080a40e43SLars Ellenberg return -EIO; 411b411b363SPhilipp Reisner 412b411b363SPhilipp Reisner /* GFP_TRY, because if there is no memory available right now, this may 413b411b363SPhilipp Reisner * be rescheduled for later. It is "only" background resync, after all. */ 41469a22773SAndreas Gruenbacher peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, 4159104d31aSLars Ellenberg size, size, GFP_TRY); 416db830c46SAndreas Gruenbacher if (!peer_req) 41780a40e43SLars Ellenberg goto defer; 418b411b363SPhilipp Reisner 419a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_send_csum; 4200500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 421b9ed7080SLars Ellenberg list_add_tail(&peer_req->w.list, &device->read_ee); 4220500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 423b411b363SPhilipp Reisner 424b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_ev); 425bb3cc85eSMike Christie if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0, 426bb3cc85eSMike Christie DRBD_FAULT_RS_RD) == 0) 42780a40e43SLars Ellenberg return 0; 42845bb912bSLars Ellenberg 42910f6d992SLars Ellenberg /* If it failed because of ENOMEM, retry should help. If it failed 43010f6d992SLars Ellenberg * because bio_add_page failed (probably broken lower level driver), 43110f6d992SLars Ellenberg * retry may or may not help. 43210f6d992SLars Ellenberg * If it does not, you may need to force disconnect. */ 4330500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 434a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 4350500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 43622cc37a9SLars Ellenberg 437b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 43880a40e43SLars Ellenberg defer: 439b30ab791SAndreas Gruenbacher put_ldev(device); 44080a40e43SLars Ellenberg return -EAGAIN; 441b411b363SPhilipp Reisner } 442b411b363SPhilipp Reisner 44399920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel) 444794abb75SPhilipp Reisner { 44584b8c06bSAndreas Gruenbacher struct drbd_device *device = 44684b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, resync_work); 44784b8c06bSAndreas Gruenbacher 448b30ab791SAndreas Gruenbacher switch (device->state.conn) { 449794abb75SPhilipp Reisner case C_VERIFY_S: 450d448a2e1SAndreas Gruenbacher make_ov_request(device, cancel); 451794abb75SPhilipp Reisner break; 452794abb75SPhilipp Reisner case C_SYNC_TARGET: 453d448a2e1SAndreas Gruenbacher make_resync_request(device, cancel); 454794abb75SPhilipp Reisner break; 455794abb75SPhilipp Reisner } 456794abb75SPhilipp Reisner 45799920dc5SAndreas Gruenbacher return 0; 458794abb75SPhilipp Reisner } 459794abb75SPhilipp Reisner 4602bccef39SKees Cook void resync_timer_fn(struct timer_list *t) 461b411b363SPhilipp Reisner { 4622bccef39SKees Cook struct drbd_device *device = from_timer(device, t, resync_timer); 463b411b363SPhilipp Reisner 46415e26f6aSLars Ellenberg drbd_queue_work_if_unqueued( 46515e26f6aSLars Ellenberg &first_peer_device(device)->connection->sender_work, 46684b8c06bSAndreas Gruenbacher &device->resync_work); 467b411b363SPhilipp Reisner } 468b411b363SPhilipp Reisner 469778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value) 470778f271dSPhilipp Reisner { 471778f271dSPhilipp Reisner int i; 472778f271dSPhilipp Reisner 473778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 474f10f2623SPhilipp Reisner fb->values[i] = value; 475778f271dSPhilipp Reisner } 476778f271dSPhilipp Reisner 477778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value) 478778f271dSPhilipp Reisner { 479778f271dSPhilipp Reisner int ov; 480778f271dSPhilipp Reisner 481778f271dSPhilipp Reisner ov = fb->values[fb->head_index]; 482778f271dSPhilipp Reisner fb->values[fb->head_index++] = value; 483778f271dSPhilipp Reisner 484778f271dSPhilipp Reisner if (fb->head_index >= fb->size) 485778f271dSPhilipp Reisner fb->head_index = 0; 486778f271dSPhilipp Reisner 487778f271dSPhilipp Reisner return ov; 488778f271dSPhilipp Reisner } 489778f271dSPhilipp Reisner 490778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value) 491778f271dSPhilipp Reisner { 492778f271dSPhilipp Reisner int i; 493778f271dSPhilipp Reisner 494778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 495778f271dSPhilipp Reisner fb->values[i] += value; 496778f271dSPhilipp Reisner } 497778f271dSPhilipp Reisner 4989958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size) 4999958c857SPhilipp Reisner { 5009958c857SPhilipp Reisner struct fifo_buffer *fb; 5019958c857SPhilipp Reisner 5028747d30aSLars Ellenberg fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO); 5039958c857SPhilipp Reisner if (!fb) 5049958c857SPhilipp Reisner return NULL; 5059958c857SPhilipp Reisner 5069958c857SPhilipp Reisner fb->head_index = 0; 5079958c857SPhilipp Reisner fb->size = fifo_size; 5089958c857SPhilipp Reisner fb->total = 0; 5099958c857SPhilipp Reisner 5109958c857SPhilipp Reisner return fb; 5119958c857SPhilipp Reisner } 5129958c857SPhilipp Reisner 5130e49d7b0SLars Ellenberg static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) 514778f271dSPhilipp Reisner { 515daeda1ccSPhilipp Reisner struct disk_conf *dc; 5167f34f614SLars Ellenberg unsigned int want; /* The number of sectors we want in-flight */ 517778f271dSPhilipp Reisner int req_sect; /* Number of sectors to request in this turn */ 5187f34f614SLars Ellenberg int correction; /* Number of sectors more we need in-flight */ 519778f271dSPhilipp Reisner int cps; /* correction per invocation of drbd_rs_controller() */ 520778f271dSPhilipp Reisner int steps; /* Number of time steps to plan ahead */ 521778f271dSPhilipp Reisner int curr_corr; 522778f271dSPhilipp Reisner int max_sect; 523813472ceSPhilipp Reisner struct fifo_buffer *plan; 524778f271dSPhilipp Reisner 525b30ab791SAndreas Gruenbacher dc = rcu_dereference(device->ldev->disk_conf); 526b30ab791SAndreas Gruenbacher plan = rcu_dereference(device->rs_plan_s); 527778f271dSPhilipp Reisner 528813472ceSPhilipp Reisner steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ 529778f271dSPhilipp Reisner 530b30ab791SAndreas Gruenbacher if (device->rs_in_flight + sect_in == 0) { /* At start of resync */ 531daeda1ccSPhilipp Reisner want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; 532778f271dSPhilipp Reisner } else { /* normal path */ 533daeda1ccSPhilipp Reisner want = dc->c_fill_target ? dc->c_fill_target : 534daeda1ccSPhilipp Reisner sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); 535778f271dSPhilipp Reisner } 536778f271dSPhilipp Reisner 537b30ab791SAndreas Gruenbacher correction = want - device->rs_in_flight - plan->total; 538778f271dSPhilipp Reisner 539778f271dSPhilipp Reisner /* Plan ahead */ 540778f271dSPhilipp Reisner cps = correction / steps; 541813472ceSPhilipp Reisner fifo_add_val(plan, cps); 542813472ceSPhilipp Reisner plan->total += cps * steps; 543778f271dSPhilipp Reisner 544778f271dSPhilipp Reisner /* What we do in this step */ 545813472ceSPhilipp Reisner curr_corr = fifo_push(plan, 0); 546813472ceSPhilipp Reisner plan->total -= curr_corr; 547778f271dSPhilipp Reisner 548778f271dSPhilipp Reisner req_sect = sect_in + curr_corr; 549778f271dSPhilipp Reisner if (req_sect < 0) 550778f271dSPhilipp Reisner req_sect = 0; 551778f271dSPhilipp Reisner 552daeda1ccSPhilipp Reisner max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ; 553778f271dSPhilipp Reisner if (req_sect > max_sect) 554778f271dSPhilipp Reisner req_sect = max_sect; 555778f271dSPhilipp Reisner 556778f271dSPhilipp Reisner /* 557d0180171SAndreas Gruenbacher drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", 558b30ab791SAndreas Gruenbacher sect_in, device->rs_in_flight, want, correction, 559b30ab791SAndreas Gruenbacher steps, cps, device->rs_planed, curr_corr, req_sect); 560778f271dSPhilipp Reisner */ 561778f271dSPhilipp Reisner 562778f271dSPhilipp Reisner return req_sect; 563778f271dSPhilipp Reisner } 564778f271dSPhilipp Reisner 565b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device) 566e65f440dSLars Ellenberg { 5670e49d7b0SLars Ellenberg unsigned int sect_in; /* Number of sectors that came in since the last turn */ 5680e49d7b0SLars Ellenberg int number, mxb; 5690e49d7b0SLars Ellenberg 5700e49d7b0SLars Ellenberg sect_in = atomic_xchg(&device->rs_sect_in, 0); 5710e49d7b0SLars Ellenberg device->rs_in_flight -= sect_in; 572813472ceSPhilipp Reisner 573813472ceSPhilipp Reisner rcu_read_lock(); 5740e49d7b0SLars Ellenberg mxb = drbd_get_max_buffers(device) / 2; 575b30ab791SAndreas Gruenbacher if (rcu_dereference(device->rs_plan_s)->size) { 5760e49d7b0SLars Ellenberg number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); 577b30ab791SAndreas Gruenbacher device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; 578e65f440dSLars Ellenberg } else { 579b30ab791SAndreas Gruenbacher device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; 580b30ab791SAndreas Gruenbacher number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 581e65f440dSLars Ellenberg } 582813472ceSPhilipp Reisner rcu_read_unlock(); 583e65f440dSLars Ellenberg 5840e49d7b0SLars Ellenberg /* Don't have more than "max-buffers"/2 in-flight. 5850e49d7b0SLars Ellenberg * Otherwise we may cause the remote site to stall on drbd_alloc_pages(), 5860e49d7b0SLars Ellenberg * potentially causing a distributed deadlock on congestion during 5870e49d7b0SLars Ellenberg * online-verify or (checksum-based) resync, if max-buffers, 5880e49d7b0SLars Ellenberg * socket buffer sizes and resync rate settings are mis-configured. */ 5897f34f614SLars Ellenberg 5907f34f614SLars Ellenberg /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k), 5917f34f614SLars Ellenberg * mxb (as used here, and in drbd_alloc_pages on the peer) is 5927f34f614SLars Ellenberg * "number of pages" (typically also 4k), 5937f34f614SLars Ellenberg * but "rs_in_flight" is in "sectors" (512 Byte). */ 5947f34f614SLars Ellenberg if (mxb - device->rs_in_flight/8 < number) 5957f34f614SLars Ellenberg number = mxb - device->rs_in_flight/8; 5960e49d7b0SLars Ellenberg 597e65f440dSLars Ellenberg return number; 598e65f440dSLars Ellenberg } 599e65f440dSLars Ellenberg 60044a4d551SLars Ellenberg static int make_resync_request(struct drbd_device *const device, int cancel) 601b411b363SPhilipp Reisner { 60244a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 60344a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 604b411b363SPhilipp Reisner unsigned long bit; 605b411b363SPhilipp Reisner sector_t sector; 606b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 6071816a2b4SLars Ellenberg int max_bio_size; 608e65f440dSLars Ellenberg int number, rollback_i, size; 609506afb62SLars Ellenberg int align, requeue = 0; 6100f0601f4SLars Ellenberg int i = 0; 61192d94ae6SPhilipp Reisner int discard_granularity = 0; 612b411b363SPhilipp Reisner 613b411b363SPhilipp Reisner if (unlikely(cancel)) 61499920dc5SAndreas Gruenbacher return 0; 615b411b363SPhilipp Reisner 616b30ab791SAndreas Gruenbacher if (device->rs_total == 0) { 617af85e8e8SLars Ellenberg /* empty resync? */ 618b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 61999920dc5SAndreas Gruenbacher return 0; 620af85e8e8SLars Ellenberg } 621af85e8e8SLars Ellenberg 622b30ab791SAndreas Gruenbacher if (!get_ldev(device)) { 623b30ab791SAndreas Gruenbacher /* Since we only need to access device->rsync a 624b30ab791SAndreas Gruenbacher get_ldev_if_state(device,D_FAILED) would be sufficient, but 625b411b363SPhilipp Reisner to continue resync with a broken disk makes no sense at 626b411b363SPhilipp Reisner all */ 627d0180171SAndreas Gruenbacher drbd_err(device, "Disk broke down during resync!\n"); 62899920dc5SAndreas Gruenbacher return 0; 629b411b363SPhilipp Reisner } 630b411b363SPhilipp Reisner 6319104d31aSLars Ellenberg if (connection->agreed_features & DRBD_FF_THIN_RESYNC) { 63292d94ae6SPhilipp Reisner rcu_read_lock(); 63392d94ae6SPhilipp Reisner discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity; 63492d94ae6SPhilipp Reisner rcu_read_unlock(); 63592d94ae6SPhilipp Reisner } 63692d94ae6SPhilipp Reisner 637b30ab791SAndreas Gruenbacher max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; 638b30ab791SAndreas Gruenbacher number = drbd_rs_number_requests(device); 6390e49d7b0SLars Ellenberg if (number <= 0) 6400f0601f4SLars Ellenberg goto requeue; 641b411b363SPhilipp Reisner 642b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 643506afb62SLars Ellenberg /* Stop generating RS requests when half of the send buffer is filled, 644506afb62SLars Ellenberg * but notify TCP that we'd like to have more space. */ 64544a4d551SLars Ellenberg mutex_lock(&connection->data.mutex); 64644a4d551SLars Ellenberg if (connection->data.socket) { 647506afb62SLars Ellenberg struct sock *sk = connection->data.socket->sk; 648506afb62SLars Ellenberg int queued = sk->sk_wmem_queued; 649506afb62SLars Ellenberg int sndbuf = sk->sk_sndbuf; 650506afb62SLars Ellenberg if (queued > sndbuf / 2) { 651506afb62SLars Ellenberg requeue = 1; 652506afb62SLars Ellenberg if (sk->sk_socket) 653506afb62SLars Ellenberg set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 654b411b363SPhilipp Reisner } 655506afb62SLars Ellenberg } else 656506afb62SLars Ellenberg requeue = 1; 65744a4d551SLars Ellenberg mutex_unlock(&connection->data.mutex); 658506afb62SLars Ellenberg if (requeue) 659b411b363SPhilipp Reisner goto requeue; 660b411b363SPhilipp Reisner 661b411b363SPhilipp Reisner next_sector: 662b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 663b30ab791SAndreas Gruenbacher bit = drbd_bm_find_next(device, device->bm_resync_fo); 664b411b363SPhilipp Reisner 6654b0715f0SLars Ellenberg if (bit == DRBD_END_OF_BITMAP) { 666b30ab791SAndreas Gruenbacher device->bm_resync_fo = drbd_bm_bits(device); 667b30ab791SAndreas Gruenbacher put_ldev(device); 66899920dc5SAndreas Gruenbacher return 0; 669b411b363SPhilipp Reisner } 670b411b363SPhilipp Reisner 671b411b363SPhilipp Reisner sector = BM_BIT_TO_SECT(bit); 672b411b363SPhilipp Reisner 673ad3fee79SLars Ellenberg if (drbd_try_rs_begin_io(device, sector)) { 674b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit; 675b411b363SPhilipp Reisner goto requeue; 676b411b363SPhilipp Reisner } 677b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit + 1; 678b411b363SPhilipp Reisner 679b30ab791SAndreas Gruenbacher if (unlikely(drbd_bm_test_bit(device, bit) == 0)) { 680b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 681b411b363SPhilipp Reisner goto next_sector; 682b411b363SPhilipp Reisner } 683b411b363SPhilipp Reisner 6841816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE 685b411b363SPhilipp Reisner /* try to find some adjacent bits. 686b411b363SPhilipp Reisner * we stop if we have already the maximum req size. 687b411b363SPhilipp Reisner * 688b411b363SPhilipp Reisner * Additionally always align bigger requests, in order to 689b411b363SPhilipp Reisner * be prepared for all stripe sizes of software RAIDs. 690b411b363SPhilipp Reisner */ 691b411b363SPhilipp Reisner align = 1; 692d207450cSPhilipp Reisner rollback_i = i; 6936377b923SLars Ellenberg while (i < number) { 6941816a2b4SLars Ellenberg if (size + BM_BLOCK_SIZE > max_bio_size) 695b411b363SPhilipp Reisner break; 696b411b363SPhilipp Reisner 697b411b363SPhilipp Reisner /* Be always aligned */ 698b411b363SPhilipp Reisner if (sector & ((1<<(align+3))-1)) 699b411b363SPhilipp Reisner break; 700b411b363SPhilipp Reisner 70192d94ae6SPhilipp Reisner if (discard_granularity && size == discard_granularity) 70292d94ae6SPhilipp Reisner break; 70392d94ae6SPhilipp Reisner 704b411b363SPhilipp Reisner /* do not cross extent boundaries */ 705b411b363SPhilipp Reisner if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) 706b411b363SPhilipp Reisner break; 707b411b363SPhilipp Reisner /* now, is it actually dirty, after all? 708b411b363SPhilipp Reisner * caution, drbd_bm_test_bit is tri-state for some 709b411b363SPhilipp Reisner * obscure reason; ( b == 0 ) would get the out-of-band 710b411b363SPhilipp Reisner * only accidentally right because of the "oddly sized" 711b411b363SPhilipp Reisner * adjustment below */ 712b30ab791SAndreas Gruenbacher if (drbd_bm_test_bit(device, bit+1) != 1) 713b411b363SPhilipp Reisner break; 714b411b363SPhilipp Reisner bit++; 715b411b363SPhilipp Reisner size += BM_BLOCK_SIZE; 716b411b363SPhilipp Reisner if ((BM_BLOCK_SIZE << align) <= size) 717b411b363SPhilipp Reisner align++; 718b411b363SPhilipp Reisner i++; 719b411b363SPhilipp Reisner } 720b411b363SPhilipp Reisner /* if we merged some, 721b411b363SPhilipp Reisner * reset the offset to start the next drbd_bm_find_next from */ 722b411b363SPhilipp Reisner if (size > BM_BLOCK_SIZE) 723b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit + 1; 724b411b363SPhilipp Reisner #endif 725b411b363SPhilipp Reisner 726b411b363SPhilipp Reisner /* adjust very last sectors, in case we are oddly sized */ 727b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 728b411b363SPhilipp Reisner size = (capacity-sector)<<9; 729aaaba345SLars Ellenberg 730aaaba345SLars Ellenberg if (device->use_csums) { 73144a4d551SLars Ellenberg switch (read_for_csum(peer_device, sector, size)) { 73280a40e43SLars Ellenberg case -EIO: /* Disk failure */ 733b30ab791SAndreas Gruenbacher put_ldev(device); 73499920dc5SAndreas Gruenbacher return -EIO; 73580a40e43SLars Ellenberg case -EAGAIN: /* allocation failed, or ldev busy */ 736b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 737b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 738d207450cSPhilipp Reisner i = rollback_i; 739b411b363SPhilipp Reisner goto requeue; 74080a40e43SLars Ellenberg case 0: 74180a40e43SLars Ellenberg /* everything ok */ 74280a40e43SLars Ellenberg break; 74380a40e43SLars Ellenberg default: 74480a40e43SLars Ellenberg BUG(); 745b411b363SPhilipp Reisner } 746b411b363SPhilipp Reisner } else { 74799920dc5SAndreas Gruenbacher int err; 74899920dc5SAndreas Gruenbacher 749b30ab791SAndreas Gruenbacher inc_rs_pending(device); 75092d94ae6SPhilipp Reisner err = drbd_send_drequest(peer_device, 75192d94ae6SPhilipp Reisner size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, 75299920dc5SAndreas Gruenbacher sector, size, ID_SYNCER); 75399920dc5SAndreas Gruenbacher if (err) { 754d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); 755b30ab791SAndreas Gruenbacher dec_rs_pending(device); 756b30ab791SAndreas Gruenbacher put_ldev(device); 75799920dc5SAndreas Gruenbacher return err; 758b411b363SPhilipp Reisner } 759b411b363SPhilipp Reisner } 760b411b363SPhilipp Reisner } 761b411b363SPhilipp Reisner 762b30ab791SAndreas Gruenbacher if (device->bm_resync_fo >= drbd_bm_bits(device)) { 763b411b363SPhilipp Reisner /* last syncer _request_ was sent, 764b411b363SPhilipp Reisner * but the P_RS_DATA_REPLY not yet received. sync will end (and 765b411b363SPhilipp Reisner * next sync group will resume), as soon as we receive the last 766b411b363SPhilipp Reisner * resync data block, and the last bit is cleared. 767b411b363SPhilipp Reisner * until then resync "work" is "inactive" ... 768b411b363SPhilipp Reisner */ 769b30ab791SAndreas Gruenbacher put_ldev(device); 77099920dc5SAndreas Gruenbacher return 0; 771b411b363SPhilipp Reisner } 772b411b363SPhilipp Reisner 773b411b363SPhilipp Reisner requeue: 774b30ab791SAndreas Gruenbacher device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 775b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 776b30ab791SAndreas Gruenbacher put_ldev(device); 77799920dc5SAndreas Gruenbacher return 0; 778b411b363SPhilipp Reisner } 779b411b363SPhilipp Reisner 780d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *device, int cancel) 781b411b363SPhilipp Reisner { 782b411b363SPhilipp Reisner int number, i, size; 783b411b363SPhilipp Reisner sector_t sector; 784b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 78558ffa580SLars Ellenberg bool stop_sector_reached = false; 786b411b363SPhilipp Reisner 787b411b363SPhilipp Reisner if (unlikely(cancel)) 788b411b363SPhilipp Reisner return 1; 789b411b363SPhilipp Reisner 790b30ab791SAndreas Gruenbacher number = drbd_rs_number_requests(device); 791b411b363SPhilipp Reisner 792b30ab791SAndreas Gruenbacher sector = device->ov_position; 793b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 79458ffa580SLars Ellenberg if (sector >= capacity) 795b411b363SPhilipp Reisner return 1; 79658ffa580SLars Ellenberg 79758ffa580SLars Ellenberg /* We check for "finished" only in the reply path: 79858ffa580SLars Ellenberg * w_e_end_ov_reply(). 79958ffa580SLars Ellenberg * We need to send at least one request out. */ 80058ffa580SLars Ellenberg stop_sector_reached = i > 0 801b30ab791SAndreas Gruenbacher && verify_can_do_stop_sector(device) 802b30ab791SAndreas Gruenbacher && sector >= device->ov_stop_sector; 80358ffa580SLars Ellenberg if (stop_sector_reached) 80458ffa580SLars Ellenberg break; 805b411b363SPhilipp Reisner 806b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 807b411b363SPhilipp Reisner 808ad3fee79SLars Ellenberg if (drbd_try_rs_begin_io(device, sector)) { 809b30ab791SAndreas Gruenbacher device->ov_position = sector; 810b411b363SPhilipp Reisner goto requeue; 811b411b363SPhilipp Reisner } 812b411b363SPhilipp Reisner 813b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 814b411b363SPhilipp Reisner size = (capacity-sector)<<9; 815b411b363SPhilipp Reisner 816b30ab791SAndreas Gruenbacher inc_rs_pending(device); 81769a22773SAndreas Gruenbacher if (drbd_send_ov_request(first_peer_device(device), sector, size)) { 818b30ab791SAndreas Gruenbacher dec_rs_pending(device); 819b411b363SPhilipp Reisner return 0; 820b411b363SPhilipp Reisner } 821b411b363SPhilipp Reisner sector += BM_SECT_PER_BIT; 822b411b363SPhilipp Reisner } 823b30ab791SAndreas Gruenbacher device->ov_position = sector; 824b411b363SPhilipp Reisner 825b411b363SPhilipp Reisner requeue: 826b30ab791SAndreas Gruenbacher device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 82758ffa580SLars Ellenberg if (i == 0 || !stop_sector_reached) 828b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 829b411b363SPhilipp Reisner return 1; 830b411b363SPhilipp Reisner } 831b411b363SPhilipp Reisner 83299920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel) 833b411b363SPhilipp Reisner { 83484b8c06bSAndreas Gruenbacher struct drbd_device_work *dw = 83584b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device_work, w); 83684b8c06bSAndreas Gruenbacher struct drbd_device *device = dw->device; 83784b8c06bSAndreas Gruenbacher kfree(dw); 838b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 839b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 840b411b363SPhilipp Reisner 84199920dc5SAndreas Gruenbacher return 0; 842b411b363SPhilipp Reisner } 843b411b363SPhilipp Reisner 84499920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel) 845b411b363SPhilipp Reisner { 84684b8c06bSAndreas Gruenbacher struct drbd_device_work *dw = 84784b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device_work, w); 84884b8c06bSAndreas Gruenbacher struct drbd_device *device = dw->device; 84984b8c06bSAndreas Gruenbacher kfree(dw); 850b411b363SPhilipp Reisner 851b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 852b411b363SPhilipp Reisner 85399920dc5SAndreas Gruenbacher return 0; 854b411b363SPhilipp Reisner } 855b411b363SPhilipp Reisner 856b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device) 857af85e8e8SLars Ellenberg { 858a6b32bc3SAndreas Gruenbacher struct drbd_connection *connection = first_peer_device(device)->connection; 8592a67d8b9SPhilipp Reisner 860bde89a9eSAndreas Gruenbacher clear_bit(GOT_PING_ACK, &connection->flags); 861bde89a9eSAndreas Gruenbacher request_ping(connection); 862bde89a9eSAndreas Gruenbacher wait_event(connection->ping_wait, 863bde89a9eSAndreas Gruenbacher test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); 864af85e8e8SLars Ellenberg } 865af85e8e8SLars Ellenberg 866b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device) 867b411b363SPhilipp Reisner { 86826a96110SLars Ellenberg struct drbd_connection *connection = first_peer_device(device)->connection; 869b411b363SPhilipp Reisner unsigned long db, dt, dbdt; 870b411b363SPhilipp Reisner unsigned long n_oos; 871b411b363SPhilipp Reisner union drbd_state os, ns; 87284b8c06bSAndreas Gruenbacher struct drbd_device_work *dw; 873b411b363SPhilipp Reisner char *khelper_cmd = NULL; 87426525618SLars Ellenberg int verify_done = 0; 875b411b363SPhilipp Reisner 876b411b363SPhilipp Reisner /* Remove all elements from the resync LRU. Since future actions 877b411b363SPhilipp Reisner * might set bits in the (main) bitmap, then the entries in the 878b411b363SPhilipp Reisner * resync LRU would be wrong. */ 879b30ab791SAndreas Gruenbacher if (drbd_rs_del_all(device)) { 880b411b363SPhilipp Reisner /* In case this is not possible now, most probably because 881b411b363SPhilipp Reisner * there are P_RS_DATA_REPLY Packets lingering on the worker's 882b411b363SPhilipp Reisner * queue (or even the read operations for those packets 883b411b363SPhilipp Reisner * is not finished by now). Retry in 100ms. */ 884b411b363SPhilipp Reisner 88520ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ / 10); 88684b8c06bSAndreas Gruenbacher dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC); 88784b8c06bSAndreas Gruenbacher if (dw) { 88884b8c06bSAndreas Gruenbacher dw->w.cb = w_resync_finished; 88984b8c06bSAndreas Gruenbacher dw->device = device; 89026a96110SLars Ellenberg drbd_queue_work(&connection->sender_work, &dw->w); 891b411b363SPhilipp Reisner return 1; 892b411b363SPhilipp Reisner } 89384b8c06bSAndreas Gruenbacher drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); 894b411b363SPhilipp Reisner } 895b411b363SPhilipp Reisner 896b30ab791SAndreas Gruenbacher dt = (jiffies - device->rs_start - device->rs_paused) / HZ; 897b411b363SPhilipp Reisner if (dt <= 0) 898b411b363SPhilipp Reisner dt = 1; 89958ffa580SLars Ellenberg 900b30ab791SAndreas Gruenbacher db = device->rs_total; 90158ffa580SLars Ellenberg /* adjust for verify start and stop sectors, respective reached position */ 902b30ab791SAndreas Gruenbacher if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 903b30ab791SAndreas Gruenbacher db -= device->ov_left; 90458ffa580SLars Ellenberg 905b411b363SPhilipp Reisner dbdt = Bit2KB(db/dt); 906b30ab791SAndreas Gruenbacher device->rs_paused /= HZ; 907b411b363SPhilipp Reisner 908b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 909b411b363SPhilipp Reisner goto out; 910b411b363SPhilipp Reisner 911b30ab791SAndreas Gruenbacher ping_peer(device); 912af85e8e8SLars Ellenberg 9130500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 914b30ab791SAndreas Gruenbacher os = drbd_read_state(device); 915b411b363SPhilipp Reisner 91626525618SLars Ellenberg verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); 91726525618SLars Ellenberg 918b411b363SPhilipp Reisner /* This protects us against multiple calls (that can happen in the presence 919b411b363SPhilipp Reisner of application IO), and against connectivity loss just before we arrive here. */ 920b411b363SPhilipp Reisner if (os.conn <= C_CONNECTED) 921b411b363SPhilipp Reisner goto out_unlock; 922b411b363SPhilipp Reisner 923b411b363SPhilipp Reisner ns = os; 924b411b363SPhilipp Reisner ns.conn = C_CONNECTED; 925b411b363SPhilipp Reisner 926d0180171SAndreas Gruenbacher drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 92726525618SLars Ellenberg verify_done ? "Online verify" : "Resync", 928b30ab791SAndreas Gruenbacher dt + device->rs_paused, device->rs_paused, dbdt); 929b411b363SPhilipp Reisner 930b30ab791SAndreas Gruenbacher n_oos = drbd_bm_total_weight(device); 931b411b363SPhilipp Reisner 932b411b363SPhilipp Reisner if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { 933b411b363SPhilipp Reisner if (n_oos) { 934d0180171SAndreas Gruenbacher drbd_alert(device, "Online verify found %lu %dk block out of sync!\n", 935b411b363SPhilipp Reisner n_oos, Bit2KB(1)); 936b411b363SPhilipp Reisner khelper_cmd = "out-of-sync"; 937b411b363SPhilipp Reisner } 938b411b363SPhilipp Reisner } else { 9390b0ba1efSAndreas Gruenbacher D_ASSERT(device, (n_oos - device->rs_failed) == 0); 940b411b363SPhilipp Reisner 941b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) 942b411b363SPhilipp Reisner khelper_cmd = "after-resync-target"; 943b411b363SPhilipp Reisner 944aaaba345SLars Ellenberg if (device->use_csums && device->rs_total) { 945b30ab791SAndreas Gruenbacher const unsigned long s = device->rs_same_csum; 946b30ab791SAndreas Gruenbacher const unsigned long t = device->rs_total; 947b411b363SPhilipp Reisner const int ratio = 948b411b363SPhilipp Reisner (t == 0) ? 0 : 949b411b363SPhilipp Reisner (t < 100000) ? ((s*100)/t) : (s/(t/100)); 950d0180171SAndreas Gruenbacher drbd_info(device, "%u %% had equal checksums, eliminated: %luK; " 951b411b363SPhilipp Reisner "transferred %luK total %luK\n", 952b411b363SPhilipp Reisner ratio, 953b30ab791SAndreas Gruenbacher Bit2KB(device->rs_same_csum), 954b30ab791SAndreas Gruenbacher Bit2KB(device->rs_total - device->rs_same_csum), 955b30ab791SAndreas Gruenbacher Bit2KB(device->rs_total)); 956b411b363SPhilipp Reisner } 957b411b363SPhilipp Reisner } 958b411b363SPhilipp Reisner 959b30ab791SAndreas Gruenbacher if (device->rs_failed) { 960d0180171SAndreas Gruenbacher drbd_info(device, " %lu failed blocks\n", device->rs_failed); 961b411b363SPhilipp Reisner 962b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 963b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 964b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 965b411b363SPhilipp Reisner } else { 966b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 967b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 968b411b363SPhilipp Reisner } 969b411b363SPhilipp Reisner } else { 970b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 971b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 972b411b363SPhilipp Reisner 973b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 974b30ab791SAndreas Gruenbacher if (device->p_uuid) { 975b411b363SPhilipp Reisner int i; 976b411b363SPhilipp Reisner for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) 977b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, i, device->p_uuid[i]); 978b30ab791SAndreas Gruenbacher drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]); 979b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]); 980b411b363SPhilipp Reisner } else { 981d0180171SAndreas Gruenbacher drbd_err(device, "device->p_uuid is NULL! BUG\n"); 982b411b363SPhilipp Reisner } 983b411b363SPhilipp Reisner } 984b411b363SPhilipp Reisner 98562b0da3aSLars Ellenberg if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { 98662b0da3aSLars Ellenberg /* for verify runs, we don't update uuids here, 98762b0da3aSLars Ellenberg * so there would be nothing to report. */ 988b30ab791SAndreas Gruenbacher drbd_uuid_set_bm(device, 0UL); 989b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "updated UUIDs"); 990b30ab791SAndreas Gruenbacher if (device->p_uuid) { 991b411b363SPhilipp Reisner /* Now the two UUID sets are equal, update what we 992b411b363SPhilipp Reisner * know of the peer. */ 993b411b363SPhilipp Reisner int i; 994b411b363SPhilipp Reisner for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) 995b30ab791SAndreas Gruenbacher device->p_uuid[i] = device->ldev->md.uuid[i]; 996b411b363SPhilipp Reisner } 997b411b363SPhilipp Reisner } 99862b0da3aSLars Ellenberg } 999b411b363SPhilipp Reisner 1000b30ab791SAndreas Gruenbacher _drbd_set_state(device, ns, CS_VERBOSE, NULL); 1001b411b363SPhilipp Reisner out_unlock: 10020500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 100326a96110SLars Ellenberg 100426a96110SLars Ellenberg /* If we have been sync source, and have an effective fencing-policy, 100526a96110SLars Ellenberg * once *all* volumes are back in sync, call "unfence". */ 100626a96110SLars Ellenberg if (os.conn == C_SYNC_SOURCE) { 100726a96110SLars Ellenberg enum drbd_disk_state disk_state = D_MASK; 100826a96110SLars Ellenberg enum drbd_disk_state pdsk_state = D_MASK; 100926a96110SLars Ellenberg enum drbd_fencing_p fp = FP_DONT_CARE; 101026a96110SLars Ellenberg 101126a96110SLars Ellenberg rcu_read_lock(); 101226a96110SLars Ellenberg fp = rcu_dereference(device->ldev->disk_conf)->fencing; 101326a96110SLars Ellenberg if (fp != FP_DONT_CARE) { 101426a96110SLars Ellenberg struct drbd_peer_device *peer_device; 101526a96110SLars Ellenberg int vnr; 101626a96110SLars Ellenberg idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 101726a96110SLars Ellenberg struct drbd_device *device = peer_device->device; 101826a96110SLars Ellenberg disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk); 101926a96110SLars Ellenberg pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk); 102026a96110SLars Ellenberg } 102126a96110SLars Ellenberg } 102226a96110SLars Ellenberg rcu_read_unlock(); 102326a96110SLars Ellenberg if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE) 102426a96110SLars Ellenberg conn_khelper(connection, "unfence-peer"); 102526a96110SLars Ellenberg } 102626a96110SLars Ellenberg 1027b30ab791SAndreas Gruenbacher put_ldev(device); 1028b411b363SPhilipp Reisner out: 1029b30ab791SAndreas Gruenbacher device->rs_total = 0; 1030b30ab791SAndreas Gruenbacher device->rs_failed = 0; 1031b30ab791SAndreas Gruenbacher device->rs_paused = 0; 103258ffa580SLars Ellenberg 103358ffa580SLars Ellenberg /* reset start sector, if we reached end of device */ 1034b30ab791SAndreas Gruenbacher if (verify_done && device->ov_left == 0) 1035b30ab791SAndreas Gruenbacher device->ov_start_sector = 0; 1036b411b363SPhilipp Reisner 1037b30ab791SAndreas Gruenbacher drbd_md_sync(device); 103813d42685SLars Ellenberg 1039b411b363SPhilipp Reisner if (khelper_cmd) 1040b30ab791SAndreas Gruenbacher drbd_khelper(device, khelper_cmd); 1041b411b363SPhilipp Reisner 1042b411b363SPhilipp Reisner return 1; 1043b411b363SPhilipp Reisner } 1044b411b363SPhilipp Reisner 1045b411b363SPhilipp Reisner /* helper */ 1046b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) 1047b411b363SPhilipp Reisner { 1048045417f7SAndreas Gruenbacher if (drbd_peer_req_has_active_page(peer_req)) { 1049b411b363SPhilipp Reisner /* This might happen if sendpage() has not finished */ 1050db830c46SAndreas Gruenbacher int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; 1051b30ab791SAndreas Gruenbacher atomic_add(i, &device->pp_in_use_by_net); 1052b30ab791SAndreas Gruenbacher atomic_sub(i, &device->pp_in_use); 10530500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1054a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->net_ee); 10550500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1056435f0740SLars Ellenberg wake_up(&drbd_pp_wait); 1057b411b363SPhilipp Reisner } else 1058b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1059b411b363SPhilipp Reisner } 1060b411b363SPhilipp Reisner 1061b411b363SPhilipp Reisner /** 1062b411b363SPhilipp Reisner * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST 1063b411b363SPhilipp Reisner * @w: work object. 1064b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1065b411b363SPhilipp Reisner */ 106699920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel) 1067b411b363SPhilipp Reisner { 1068a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 10696780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 10706780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 107199920dc5SAndreas Gruenbacher int err; 1072b411b363SPhilipp Reisner 1073b411b363SPhilipp Reisner if (unlikely(cancel)) { 1074b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1075b30ab791SAndreas Gruenbacher dec_unacked(device); 107699920dc5SAndreas Gruenbacher return 0; 1077b411b363SPhilipp Reisner } 1078b411b363SPhilipp Reisner 1079db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 10806780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req); 1081b411b363SPhilipp Reisner } else { 1082b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1083d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegDReply. sector=%llus.\n", 1084db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 1085b411b363SPhilipp Reisner 10866780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); 1087b411b363SPhilipp Reisner } 1088b411b363SPhilipp Reisner 1089b30ab791SAndreas Gruenbacher dec_unacked(device); 1090b411b363SPhilipp Reisner 1091b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1092b411b363SPhilipp Reisner 109399920dc5SAndreas Gruenbacher if (unlikely(err)) 1094d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block() failed\n"); 109599920dc5SAndreas Gruenbacher return err; 1096b411b363SPhilipp Reisner } 1097b411b363SPhilipp Reisner 1098700ca8c0SPhilipp Reisner static bool all_zero(struct drbd_peer_request *peer_req) 1099700ca8c0SPhilipp Reisner { 1100700ca8c0SPhilipp Reisner struct page *page = peer_req->pages; 1101700ca8c0SPhilipp Reisner unsigned int len = peer_req->i.size; 1102700ca8c0SPhilipp Reisner 1103700ca8c0SPhilipp Reisner page_chain_for_each(page) { 1104700ca8c0SPhilipp Reisner unsigned int l = min_t(unsigned int, len, PAGE_SIZE); 1105700ca8c0SPhilipp Reisner unsigned int i, words = l / sizeof(long); 1106700ca8c0SPhilipp Reisner unsigned long *d; 1107700ca8c0SPhilipp Reisner 1108700ca8c0SPhilipp Reisner d = kmap_atomic(page); 1109700ca8c0SPhilipp Reisner for (i = 0; i < words; i++) { 1110700ca8c0SPhilipp Reisner if (d[i]) { 1111700ca8c0SPhilipp Reisner kunmap_atomic(d); 1112700ca8c0SPhilipp Reisner return false; 1113700ca8c0SPhilipp Reisner } 1114700ca8c0SPhilipp Reisner } 1115700ca8c0SPhilipp Reisner kunmap_atomic(d); 1116700ca8c0SPhilipp Reisner len -= l; 1117700ca8c0SPhilipp Reisner } 1118700ca8c0SPhilipp Reisner 1119700ca8c0SPhilipp Reisner return true; 1120700ca8c0SPhilipp Reisner } 1121700ca8c0SPhilipp Reisner 1122b411b363SPhilipp Reisner /** 1123a209b4aeSAndreas Gruenbacher * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST 1124b411b363SPhilipp Reisner * @w: work object. 1125b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1126b411b363SPhilipp Reisner */ 112799920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel) 1128b411b363SPhilipp Reisner { 1129a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11306780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 11316780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 113299920dc5SAndreas Gruenbacher int err; 1133b411b363SPhilipp Reisner 1134b411b363SPhilipp Reisner if (unlikely(cancel)) { 1135b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1136b30ab791SAndreas Gruenbacher dec_unacked(device); 113799920dc5SAndreas Gruenbacher return 0; 1138b411b363SPhilipp Reisner } 1139b411b363SPhilipp Reisner 1140b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_FAILED)) { 1141b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1142b30ab791SAndreas Gruenbacher put_ldev(device); 1143b411b363SPhilipp Reisner } 1144b411b363SPhilipp Reisner 1145b30ab791SAndreas Gruenbacher if (device->state.conn == C_AHEAD) { 11466780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); 1147db830c46SAndreas Gruenbacher } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1148b30ab791SAndreas Gruenbacher if (likely(device->state.pdsk >= D_INCONSISTENT)) { 1149b30ab791SAndreas Gruenbacher inc_rs_pending(device); 1150700ca8c0SPhilipp Reisner if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) 1151700ca8c0SPhilipp Reisner err = drbd_send_rs_deallocated(peer_device, peer_req); 1152700ca8c0SPhilipp Reisner else 11536780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 1154b411b363SPhilipp Reisner } else { 1155b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1156d0180171SAndreas Gruenbacher drbd_err(device, "Not sending RSDataReply, " 1157b411b363SPhilipp Reisner "partner DISKLESS!\n"); 115899920dc5SAndreas Gruenbacher err = 0; 1159b411b363SPhilipp Reisner } 1160b411b363SPhilipp Reisner } else { 1161b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1162d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegRSDReply. sector %llus.\n", 1163db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 1164b411b363SPhilipp Reisner 11656780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 1166b411b363SPhilipp Reisner 1167b411b363SPhilipp Reisner /* update resync data with failure */ 1168b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); 1169b411b363SPhilipp Reisner } 1170b411b363SPhilipp Reisner 1171b30ab791SAndreas Gruenbacher dec_unacked(device); 1172b411b363SPhilipp Reisner 1173b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1174b411b363SPhilipp Reisner 117599920dc5SAndreas Gruenbacher if (unlikely(err)) 1176d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block() failed\n"); 117799920dc5SAndreas Gruenbacher return err; 1178b411b363SPhilipp Reisner } 1179b411b363SPhilipp Reisner 118099920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) 1181b411b363SPhilipp Reisner { 1182a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11836780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 11846780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1185b411b363SPhilipp Reisner struct digest_info *di; 1186b411b363SPhilipp Reisner int digest_size; 1187b411b363SPhilipp Reisner void *digest = NULL; 118899920dc5SAndreas Gruenbacher int err, eq = 0; 1189b411b363SPhilipp Reisner 1190b411b363SPhilipp Reisner if (unlikely(cancel)) { 1191b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1192b30ab791SAndreas Gruenbacher dec_unacked(device); 119399920dc5SAndreas Gruenbacher return 0; 1194b411b363SPhilipp Reisner } 1195b411b363SPhilipp Reisner 1196b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1197b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1198b30ab791SAndreas Gruenbacher put_ldev(device); 11991d53f09eSLars Ellenberg } 1200b411b363SPhilipp Reisner 1201db830c46SAndreas Gruenbacher di = peer_req->digest; 1202b411b363SPhilipp Reisner 1203db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1204b411b363SPhilipp Reisner /* quick hack to try to avoid a race against reconfiguration. 1205b411b363SPhilipp Reisner * a real fix would be much more involved, 1206b411b363SPhilipp Reisner * introducing more locking mechanisms */ 12076780139cSAndreas Gruenbacher if (peer_device->connection->csums_tfm) { 12089534d671SHerbert Xu digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm); 12090b0ba1efSAndreas Gruenbacher D_ASSERT(device, digest_size == di->digest_size); 1210b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1211b411b363SPhilipp Reisner } 1212b411b363SPhilipp Reisner if (digest) { 12136780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 1214b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1215b411b363SPhilipp Reisner kfree(digest); 1216b411b363SPhilipp Reisner } 1217b411b363SPhilipp Reisner 1218b411b363SPhilipp Reisner if (eq) { 1219b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); 1220676396d5SLars Ellenberg /* rs_same_csums unit is BM_BLOCK_SIZE */ 1221b30ab791SAndreas Gruenbacher device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; 12226780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); 1223b411b363SPhilipp Reisner } else { 1224b30ab791SAndreas Gruenbacher inc_rs_pending(device); 1225db830c46SAndreas Gruenbacher peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ 1226db830c46SAndreas Gruenbacher peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ 1227204bba99SPhilipp Reisner kfree(di); 12286780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 1229b411b363SPhilipp Reisner } 1230b411b363SPhilipp Reisner } else { 12316780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 1232b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1233d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); 1234b411b363SPhilipp Reisner } 1235b411b363SPhilipp Reisner 1236b30ab791SAndreas Gruenbacher dec_unacked(device); 1237b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1238b411b363SPhilipp Reisner 123999920dc5SAndreas Gruenbacher if (unlikely(err)) 1240d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block/ack() failed\n"); 124199920dc5SAndreas Gruenbacher return err; 1242b411b363SPhilipp Reisner } 1243b411b363SPhilipp Reisner 124499920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel) 1245b411b363SPhilipp Reisner { 1246a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 12476780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 12486780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1249db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1250db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 1251b411b363SPhilipp Reisner int digest_size; 1252b411b363SPhilipp Reisner void *digest; 125399920dc5SAndreas Gruenbacher int err = 0; 1254b411b363SPhilipp Reisner 1255b411b363SPhilipp Reisner if (unlikely(cancel)) 1256b411b363SPhilipp Reisner goto out; 1257b411b363SPhilipp Reisner 12589534d671SHerbert Xu digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm); 1259b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 12608f21420eSPhilipp Reisner if (!digest) { 126199920dc5SAndreas Gruenbacher err = 1; /* terminate the connection in case the allocation failed */ 12628f21420eSPhilipp Reisner goto out; 12638f21420eSPhilipp Reisner } 12648f21420eSPhilipp Reisner 1265db830c46SAndreas Gruenbacher if (likely(!(peer_req->flags & EE_WAS_ERROR))) 12666780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 12678f21420eSPhilipp Reisner else 12688f21420eSPhilipp Reisner memset(digest, 0, digest_size); 12698f21420eSPhilipp Reisner 127053ea4331SLars Ellenberg /* Free e and pages before send. 127153ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 127253ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 127353ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 1274c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 1275b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1276db830c46SAndreas Gruenbacher peer_req = NULL; 1277b30ab791SAndreas Gruenbacher inc_rs_pending(device); 12786780139cSAndreas Gruenbacher err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); 127999920dc5SAndreas Gruenbacher if (err) 1280b30ab791SAndreas Gruenbacher dec_rs_pending(device); 1281b411b363SPhilipp Reisner kfree(digest); 1282b411b363SPhilipp Reisner 1283b411b363SPhilipp Reisner out: 1284db830c46SAndreas Gruenbacher if (peer_req) 1285b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1286b30ab791SAndreas Gruenbacher dec_unacked(device); 128799920dc5SAndreas Gruenbacher return err; 1288b411b363SPhilipp Reisner } 1289b411b363SPhilipp Reisner 1290b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) 1291b411b363SPhilipp Reisner { 1292b30ab791SAndreas Gruenbacher if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { 1293b30ab791SAndreas Gruenbacher device->ov_last_oos_size += size>>9; 1294b411b363SPhilipp Reisner } else { 1295b30ab791SAndreas Gruenbacher device->ov_last_oos_start = sector; 1296b30ab791SAndreas Gruenbacher device->ov_last_oos_size = size>>9; 1297b411b363SPhilipp Reisner } 1298b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, sector, size); 1299b411b363SPhilipp Reisner } 1300b411b363SPhilipp Reisner 130199920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel) 1302b411b363SPhilipp Reisner { 1303a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 13046780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 13056780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1306b411b363SPhilipp Reisner struct digest_info *di; 1307b411b363SPhilipp Reisner void *digest; 1308db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1309db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 131053ea4331SLars Ellenberg int digest_size; 131199920dc5SAndreas Gruenbacher int err, eq = 0; 131258ffa580SLars Ellenberg bool stop_sector_reached = false; 1313b411b363SPhilipp Reisner 1314b411b363SPhilipp Reisner if (unlikely(cancel)) { 1315b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1316b30ab791SAndreas Gruenbacher dec_unacked(device); 131799920dc5SAndreas Gruenbacher return 0; 1318b411b363SPhilipp Reisner } 1319b411b363SPhilipp Reisner 1320b411b363SPhilipp Reisner /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all 1321b411b363SPhilipp Reisner * the resync lru has been cleaned up already */ 1322b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1323b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1324b30ab791SAndreas Gruenbacher put_ldev(device); 13251d53f09eSLars Ellenberg } 1326b411b363SPhilipp Reisner 1327db830c46SAndreas Gruenbacher di = peer_req->digest; 1328b411b363SPhilipp Reisner 1329db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 13309534d671SHerbert Xu digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm); 1331b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1332b411b363SPhilipp Reisner if (digest) { 13336780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 1334b411b363SPhilipp Reisner 13350b0ba1efSAndreas Gruenbacher D_ASSERT(device, digest_size == di->digest_size); 1336b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1337b411b363SPhilipp Reisner kfree(digest); 1338b411b363SPhilipp Reisner } 1339b411b363SPhilipp Reisner } 1340b411b363SPhilipp Reisner 13419676c760SLars Ellenberg /* Free peer_req and pages before send. 134253ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 134353ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 134453ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 1345c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 1346b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1347b411b363SPhilipp Reisner if (!eq) 1348b30ab791SAndreas Gruenbacher drbd_ov_out_of_sync_found(device, sector, size); 1349b411b363SPhilipp Reisner else 1350b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 1351b411b363SPhilipp Reisner 13526780139cSAndreas Gruenbacher err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, 1353b411b363SPhilipp Reisner eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); 1354b411b363SPhilipp Reisner 1355b30ab791SAndreas Gruenbacher dec_unacked(device); 1356b411b363SPhilipp Reisner 1357b30ab791SAndreas Gruenbacher --device->ov_left; 1358ea5442afSLars Ellenberg 1359ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */ 1360b30ab791SAndreas Gruenbacher if ((device->ov_left & 0x200) == 0x200) 1361b30ab791SAndreas Gruenbacher drbd_advance_rs_marks(device, device->ov_left); 1362ea5442afSLars Ellenberg 1363b30ab791SAndreas Gruenbacher stop_sector_reached = verify_can_do_stop_sector(device) && 1364b30ab791SAndreas Gruenbacher (sector + (size>>9)) >= device->ov_stop_sector; 136558ffa580SLars Ellenberg 1366b30ab791SAndreas Gruenbacher if (device->ov_left == 0 || stop_sector_reached) { 1367b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 1368b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 1369b411b363SPhilipp Reisner } 1370b411b363SPhilipp Reisner 137199920dc5SAndreas Gruenbacher return err; 1372b411b363SPhilipp Reisner } 1373b411b363SPhilipp Reisner 1374b6dd1a89SLars Ellenberg /* FIXME 1375b6dd1a89SLars Ellenberg * We need to track the number of pending barrier acks, 1376b6dd1a89SLars Ellenberg * and to be able to wait for them. 1377b6dd1a89SLars Ellenberg * See also comment in drbd_adm_attach before drbd_suspend_io. 1378b6dd1a89SLars Ellenberg */ 1379bde89a9eSAndreas Gruenbacher static int drbd_send_barrier(struct drbd_connection *connection) 1380b411b363SPhilipp Reisner { 13819f5bdc33SAndreas Gruenbacher struct p_barrier *p; 1382b6dd1a89SLars Ellenberg struct drbd_socket *sock; 1383b411b363SPhilipp Reisner 1384bde89a9eSAndreas Gruenbacher sock = &connection->data; 1385bde89a9eSAndreas Gruenbacher p = conn_prepare_command(connection, sock); 13869f5bdc33SAndreas Gruenbacher if (!p) 13879f5bdc33SAndreas Gruenbacher return -EIO; 1388bde89a9eSAndreas Gruenbacher p->barrier = connection->send.current_epoch_nr; 1389b6dd1a89SLars Ellenberg p->pad = 0; 1390bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes = 0; 139184d34f2fSLars Ellenberg connection->send.last_sent_barrier_jif = jiffies; 1392b6dd1a89SLars Ellenberg 1393bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); 1394b411b363SPhilipp Reisner } 1395b411b363SPhilipp Reisner 1396c51a0ef3SLars Ellenberg static int pd_send_unplug_remote(struct drbd_peer_device *pd) 1397c51a0ef3SLars Ellenberg { 1398c51a0ef3SLars Ellenberg struct drbd_socket *sock = &pd->connection->data; 1399c51a0ef3SLars Ellenberg if (!drbd_prepare_command(pd, sock)) 1400c51a0ef3SLars Ellenberg return -EIO; 1401c51a0ef3SLars Ellenberg return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0); 1402c51a0ef3SLars Ellenberg } 1403c51a0ef3SLars Ellenberg 140499920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel) 1405b411b363SPhilipp Reisner { 140684b8c06bSAndreas Gruenbacher struct drbd_device *device = 140784b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, unplug_work); 14089f5bdc33SAndreas Gruenbacher 1409b411b363SPhilipp Reisner if (cancel) 141099920dc5SAndreas Gruenbacher return 0; 1411c51a0ef3SLars Ellenberg return pd_send_unplug_remote(first_peer_device(device)); 1412b411b363SPhilipp Reisner } 1413b411b363SPhilipp Reisner 1414bde89a9eSAndreas Gruenbacher static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch) 14154eb9b3cbSLars Ellenberg { 1416bde89a9eSAndreas Gruenbacher if (!connection->send.seen_any_write_yet) { 1417bde89a9eSAndreas Gruenbacher connection->send.seen_any_write_yet = true; 1418bde89a9eSAndreas Gruenbacher connection->send.current_epoch_nr = epoch; 1419bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes = 0; 142084d34f2fSLars Ellenberg connection->send.last_sent_barrier_jif = jiffies; 14214eb9b3cbSLars Ellenberg } 14224eb9b3cbSLars Ellenberg } 14234eb9b3cbSLars Ellenberg 1424bde89a9eSAndreas Gruenbacher static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch) 14254eb9b3cbSLars Ellenberg { 14264eb9b3cbSLars Ellenberg /* re-init if first write on this connection */ 1427bde89a9eSAndreas Gruenbacher if (!connection->send.seen_any_write_yet) 14284eb9b3cbSLars Ellenberg return; 1429bde89a9eSAndreas Gruenbacher if (connection->send.current_epoch_nr != epoch) { 1430bde89a9eSAndreas Gruenbacher if (connection->send.current_epoch_writes) 1431bde89a9eSAndreas Gruenbacher drbd_send_barrier(connection); 1432bde89a9eSAndreas Gruenbacher connection->send.current_epoch_nr = epoch; 14334eb9b3cbSLars Ellenberg } 14344eb9b3cbSLars Ellenberg } 14354eb9b3cbSLars Ellenberg 14368f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel) 143773a01a18SPhilipp Reisner { 143873a01a18SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 143984b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 144044a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 144144a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device->connection; 144299920dc5SAndreas Gruenbacher int err; 144373a01a18SPhilipp Reisner 144473a01a18SPhilipp Reisner if (unlikely(cancel)) { 14458554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 144699920dc5SAndreas Gruenbacher return 0; 144773a01a18SPhilipp Reisner } 1448e5f891b2SLars Ellenberg req->pre_send_jif = jiffies; 144973a01a18SPhilipp Reisner 1450bde89a9eSAndreas Gruenbacher /* this time, no connection->send.current_epoch_writes++; 1451b6dd1a89SLars Ellenberg * If it was sent, it was the closing barrier for the last 1452b6dd1a89SLars Ellenberg * replicated epoch, before we went into AHEAD mode. 1453b6dd1a89SLars Ellenberg * No more barriers will be sent, until we leave AHEAD mode again. */ 1454bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1455b6dd1a89SLars Ellenberg 145644a4d551SLars Ellenberg err = drbd_send_out_of_sync(peer_device, req); 14578554df1cSAndreas Gruenbacher req_mod(req, OOS_HANDED_TO_NETWORK); 145873a01a18SPhilipp Reisner 145999920dc5SAndreas Gruenbacher return err; 146073a01a18SPhilipp Reisner } 146173a01a18SPhilipp Reisner 1462b411b363SPhilipp Reisner /** 1463b411b363SPhilipp Reisner * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request 1464b411b363SPhilipp Reisner * @w: work object. 1465b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1466b411b363SPhilipp Reisner */ 146799920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel) 1468b411b363SPhilipp Reisner { 1469b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 147084b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 147144a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 147244a4d551SLars Ellenberg struct drbd_connection *connection = peer_device->connection; 1473c51a0ef3SLars Ellenberg bool do_send_unplug = req->rq_state & RQ_UNPLUG; 147499920dc5SAndreas Gruenbacher int err; 1475b411b363SPhilipp Reisner 1476b411b363SPhilipp Reisner if (unlikely(cancel)) { 14778554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 147899920dc5SAndreas Gruenbacher return 0; 1479b411b363SPhilipp Reisner } 1480e5f891b2SLars Ellenberg req->pre_send_jif = jiffies; 1481b411b363SPhilipp Reisner 1482bde89a9eSAndreas Gruenbacher re_init_if_first_write(connection, req->epoch); 1483bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1484bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes++; 1485b6dd1a89SLars Ellenberg 148644a4d551SLars Ellenberg err = drbd_send_dblock(peer_device, req); 148799920dc5SAndreas Gruenbacher req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1488b411b363SPhilipp Reisner 1489c51a0ef3SLars Ellenberg if (do_send_unplug && !err) 1490c51a0ef3SLars Ellenberg pd_send_unplug_remote(peer_device); 1491c51a0ef3SLars Ellenberg 149299920dc5SAndreas Gruenbacher return err; 1493b411b363SPhilipp Reisner } 1494b411b363SPhilipp Reisner 1495b411b363SPhilipp Reisner /** 1496b411b363SPhilipp Reisner * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet 1497b411b363SPhilipp Reisner * @w: work object. 1498b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1499b411b363SPhilipp Reisner */ 150099920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel) 1501b411b363SPhilipp Reisner { 1502b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 150384b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 150444a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 150544a4d551SLars Ellenberg struct drbd_connection *connection = peer_device->connection; 1506c51a0ef3SLars Ellenberg bool do_send_unplug = req->rq_state & RQ_UNPLUG; 150799920dc5SAndreas Gruenbacher int err; 1508b411b363SPhilipp Reisner 1509b411b363SPhilipp Reisner if (unlikely(cancel)) { 15108554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 151199920dc5SAndreas Gruenbacher return 0; 1512b411b363SPhilipp Reisner } 1513e5f891b2SLars Ellenberg req->pre_send_jif = jiffies; 1514b411b363SPhilipp Reisner 1515b6dd1a89SLars Ellenberg /* Even read requests may close a write epoch, 1516b6dd1a89SLars Ellenberg * if there was any yet. */ 1517bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1518b6dd1a89SLars Ellenberg 151944a4d551SLars Ellenberg err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, 1520b411b363SPhilipp Reisner (unsigned long)req); 1521b411b363SPhilipp Reisner 152299920dc5SAndreas Gruenbacher req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1523b411b363SPhilipp Reisner 1524c51a0ef3SLars Ellenberg if (do_send_unplug && !err) 1525c51a0ef3SLars Ellenberg pd_send_unplug_remote(peer_device); 1526c51a0ef3SLars Ellenberg 152799920dc5SAndreas Gruenbacher return err; 1528b411b363SPhilipp Reisner } 1529b411b363SPhilipp Reisner 153099920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel) 1531265be2d0SPhilipp Reisner { 1532265be2d0SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 153384b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 1534265be2d0SPhilipp Reisner 15350778286aSPhilipp Reisner if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) 15364dd726f0SLars Ellenberg drbd_al_begin_io(device, &req->i); 1537265be2d0SPhilipp Reisner 1538265be2d0SPhilipp Reisner drbd_req_make_private_bio(req, req->master_bio); 153974d46992SChristoph Hellwig bio_set_dev(req->private_bio, device->ldev->backing_bdev); 1540265be2d0SPhilipp Reisner generic_make_request(req->private_bio); 1541265be2d0SPhilipp Reisner 154299920dc5SAndreas Gruenbacher return 0; 1543265be2d0SPhilipp Reisner } 1544265be2d0SPhilipp Reisner 1545b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device) 1546b411b363SPhilipp Reisner { 1547b30ab791SAndreas Gruenbacher struct drbd_device *odev = device; 154895f8efd0SAndreas Gruenbacher int resync_after; 1549b411b363SPhilipp Reisner 1550b411b363SPhilipp Reisner while (1) { 1551a3f8f7dcSLars Ellenberg if (!odev->ldev || odev->state.disk == D_DISKLESS) 1552438c8374SPhilipp Reisner return 1; 1553daeda1ccSPhilipp Reisner rcu_read_lock(); 155495f8efd0SAndreas Gruenbacher resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 1555daeda1ccSPhilipp Reisner rcu_read_unlock(); 155695f8efd0SAndreas Gruenbacher if (resync_after == -1) 1557b411b363SPhilipp Reisner return 1; 1558b30ab791SAndreas Gruenbacher odev = minor_to_device(resync_after); 1559a3f8f7dcSLars Ellenberg if (!odev) 1560841ce241SAndreas Gruenbacher return 1; 1561b411b363SPhilipp Reisner if ((odev->state.conn >= C_SYNC_SOURCE && 1562b411b363SPhilipp Reisner odev->state.conn <= C_PAUSED_SYNC_T) || 1563b411b363SPhilipp Reisner odev->state.aftr_isp || odev->state.peer_isp || 1564b411b363SPhilipp Reisner odev->state.user_isp) 1565b411b363SPhilipp Reisner return 0; 1566b411b363SPhilipp Reisner } 1567b411b363SPhilipp Reisner } 1568b411b363SPhilipp Reisner 1569b411b363SPhilipp Reisner /** 157028bc3b8cSAndreas Gruenbacher * drbd_pause_after() - Pause resync on all devices that may not resync now 1571b30ab791SAndreas Gruenbacher * @device: DRBD device. 1572b411b363SPhilipp Reisner * 1573b411b363SPhilipp Reisner * Called from process context only (admin command and after_state_ch). 1574b411b363SPhilipp Reisner */ 157528bc3b8cSAndreas Gruenbacher static bool drbd_pause_after(struct drbd_device *device) 1576b411b363SPhilipp Reisner { 157728bc3b8cSAndreas Gruenbacher bool changed = false; 157854761697SAndreas Gruenbacher struct drbd_device *odev; 157928bc3b8cSAndreas Gruenbacher int i; 1580b411b363SPhilipp Reisner 1581695d08faSPhilipp Reisner rcu_read_lock(); 158205a10ec7SAndreas Gruenbacher idr_for_each_entry(&drbd_devices, odev, i) { 1583b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1584b411b363SPhilipp Reisner continue; 158528bc3b8cSAndreas Gruenbacher if (!_drbd_may_sync_now(odev) && 158628bc3b8cSAndreas Gruenbacher _drbd_set_state(_NS(odev, aftr_isp, 1), 158728bc3b8cSAndreas Gruenbacher CS_HARD, NULL) != SS_NOTHING_TO_DO) 158828bc3b8cSAndreas Gruenbacher changed = true; 1589b411b363SPhilipp Reisner } 1590695d08faSPhilipp Reisner rcu_read_unlock(); 1591b411b363SPhilipp Reisner 159228bc3b8cSAndreas Gruenbacher return changed; 1593b411b363SPhilipp Reisner } 1594b411b363SPhilipp Reisner 1595b411b363SPhilipp Reisner /** 159628bc3b8cSAndreas Gruenbacher * drbd_resume_next() - Resume resync on all devices that may resync now 1597b30ab791SAndreas Gruenbacher * @device: DRBD device. 1598b411b363SPhilipp Reisner * 1599b411b363SPhilipp Reisner * Called from process context only (admin command and worker). 1600b411b363SPhilipp Reisner */ 160128bc3b8cSAndreas Gruenbacher static bool drbd_resume_next(struct drbd_device *device) 1602b411b363SPhilipp Reisner { 160328bc3b8cSAndreas Gruenbacher bool changed = false; 160454761697SAndreas Gruenbacher struct drbd_device *odev; 160528bc3b8cSAndreas Gruenbacher int i; 1606b411b363SPhilipp Reisner 1607695d08faSPhilipp Reisner rcu_read_lock(); 160805a10ec7SAndreas Gruenbacher idr_for_each_entry(&drbd_devices, odev, i) { 1609b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1610b411b363SPhilipp Reisner continue; 1611b411b363SPhilipp Reisner if (odev->state.aftr_isp) { 161228bc3b8cSAndreas Gruenbacher if (_drbd_may_sync_now(odev) && 161328bc3b8cSAndreas Gruenbacher _drbd_set_state(_NS(odev, aftr_isp, 0), 161428bc3b8cSAndreas Gruenbacher CS_HARD, NULL) != SS_NOTHING_TO_DO) 161528bc3b8cSAndreas Gruenbacher changed = true; 1616b411b363SPhilipp Reisner } 1617b411b363SPhilipp Reisner } 1618695d08faSPhilipp Reisner rcu_read_unlock(); 161928bc3b8cSAndreas Gruenbacher return changed; 1620b411b363SPhilipp Reisner } 1621b411b363SPhilipp Reisner 1622b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device) 1623b411b363SPhilipp Reisner { 162428bc3b8cSAndreas Gruenbacher lock_all_resources(); 162528bc3b8cSAndreas Gruenbacher drbd_resume_next(device); 162628bc3b8cSAndreas Gruenbacher unlock_all_resources(); 1627b411b363SPhilipp Reisner } 1628b411b363SPhilipp Reisner 1629b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device) 1630b411b363SPhilipp Reisner { 163128bc3b8cSAndreas Gruenbacher lock_all_resources(); 163228bc3b8cSAndreas Gruenbacher drbd_pause_after(device); 163328bc3b8cSAndreas Gruenbacher unlock_all_resources(); 1634b411b363SPhilipp Reisner } 1635b411b363SPhilipp Reisner 163628bc3b8cSAndreas Gruenbacher /* caller must lock_all_resources() */ 1637b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) 1638b411b363SPhilipp Reisner { 163954761697SAndreas Gruenbacher struct drbd_device *odev; 164095f8efd0SAndreas Gruenbacher int resync_after; 1641b411b363SPhilipp Reisner 1642b411b363SPhilipp Reisner if (o_minor == -1) 1643b411b363SPhilipp Reisner return NO_ERROR; 1644a3f8f7dcSLars Ellenberg if (o_minor < -1 || o_minor > MINORMASK) 164595f8efd0SAndreas Gruenbacher return ERR_RESYNC_AFTER; 1646b411b363SPhilipp Reisner 1647b411b363SPhilipp Reisner /* check for loops */ 1648b30ab791SAndreas Gruenbacher odev = minor_to_device(o_minor); 1649b411b363SPhilipp Reisner while (1) { 1650b30ab791SAndreas Gruenbacher if (odev == device) 165195f8efd0SAndreas Gruenbacher return ERR_RESYNC_AFTER_CYCLE; 1652b411b363SPhilipp Reisner 1653a3f8f7dcSLars Ellenberg /* You are free to depend on diskless, non-existing, 1654a3f8f7dcSLars Ellenberg * or not yet/no longer existing minors. 1655a3f8f7dcSLars Ellenberg * We only reject dependency loops. 1656a3f8f7dcSLars Ellenberg * We cannot follow the dependency chain beyond a detached or 1657a3f8f7dcSLars Ellenberg * missing minor. 1658a3f8f7dcSLars Ellenberg */ 1659a3f8f7dcSLars Ellenberg if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS) 1660a3f8f7dcSLars Ellenberg return NO_ERROR; 1661a3f8f7dcSLars Ellenberg 1662daeda1ccSPhilipp Reisner rcu_read_lock(); 166395f8efd0SAndreas Gruenbacher resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 1664daeda1ccSPhilipp Reisner rcu_read_unlock(); 1665b411b363SPhilipp Reisner /* dependency chain ends here, no cycles. */ 166695f8efd0SAndreas Gruenbacher if (resync_after == -1) 1667b411b363SPhilipp Reisner return NO_ERROR; 1668b411b363SPhilipp Reisner 1669b411b363SPhilipp Reisner /* follow the dependency chain */ 1670b30ab791SAndreas Gruenbacher odev = minor_to_device(resync_after); 1671b411b363SPhilipp Reisner } 1672b411b363SPhilipp Reisner } 1673b411b363SPhilipp Reisner 167428bc3b8cSAndreas Gruenbacher /* caller must lock_all_resources() */ 1675b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device) 1676b411b363SPhilipp Reisner { 167728bc3b8cSAndreas Gruenbacher int changed; 1678b411b363SPhilipp Reisner 1679b411b363SPhilipp Reisner do { 168028bc3b8cSAndreas Gruenbacher changed = drbd_pause_after(device); 168128bc3b8cSAndreas Gruenbacher changed |= drbd_resume_next(device); 168228bc3b8cSAndreas Gruenbacher } while (changed); 1683b411b363SPhilipp Reisner } 1684b411b363SPhilipp Reisner 1685b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device) 16869bd28d3cSLars Ellenberg { 1687ff8bd88bSLars Ellenberg struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; 1688813472ceSPhilipp Reisner struct fifo_buffer *plan; 1689813472ceSPhilipp Reisner 1690b30ab791SAndreas Gruenbacher atomic_set(&device->rs_sect_in, 0); 1691b30ab791SAndreas Gruenbacher atomic_set(&device->rs_sect_ev, 0); 1692b30ab791SAndreas Gruenbacher device->rs_in_flight = 0; 1693ff8bd88bSLars Ellenberg device->rs_last_events = 1694ff8bd88bSLars Ellenberg (int)part_stat_read(&disk->part0, sectors[0]) + 1695ff8bd88bSLars Ellenberg (int)part_stat_read(&disk->part0, sectors[1]); 1696813472ceSPhilipp Reisner 1697813472ceSPhilipp Reisner /* Updating the RCU protected object in place is necessary since 1698813472ceSPhilipp Reisner this function gets called from atomic context. 1699813472ceSPhilipp Reisner It is valid since all other updates also lead to an completely 1700813472ceSPhilipp Reisner empty fifo */ 1701813472ceSPhilipp Reisner rcu_read_lock(); 1702b30ab791SAndreas Gruenbacher plan = rcu_dereference(device->rs_plan_s); 1703813472ceSPhilipp Reisner plan->total = 0; 1704813472ceSPhilipp Reisner fifo_set(plan, 0); 1705813472ceSPhilipp Reisner rcu_read_unlock(); 17069bd28d3cSLars Ellenberg } 17079bd28d3cSLars Ellenberg 17082bccef39SKees Cook void start_resync_timer_fn(struct timer_list *t) 17091f04af33SPhilipp Reisner { 17102bccef39SKees Cook struct drbd_device *device = from_timer(device, t, start_resync_timer); 1711ac0acb9eSLars Ellenberg drbd_device_post_work(device, RS_START); 17121f04af33SPhilipp Reisner } 17131f04af33SPhilipp Reisner 1714ac0acb9eSLars Ellenberg static void do_start_resync(struct drbd_device *device) 17151f04af33SPhilipp Reisner { 1716b30ab791SAndreas Gruenbacher if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { 1717ac0acb9eSLars Ellenberg drbd_warn(device, "postponing start_resync ...\n"); 1718b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ/10; 1719b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 1720ac0acb9eSLars Ellenberg return; 17211f04af33SPhilipp Reisner } 17221f04af33SPhilipp Reisner 1723b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_SOURCE); 1724b30ab791SAndreas Gruenbacher clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); 17251f04af33SPhilipp Reisner } 17261f04af33SPhilipp Reisner 1727aaaba345SLars Ellenberg static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) 1728aaaba345SLars Ellenberg { 1729aaaba345SLars Ellenberg bool csums_after_crash_only; 1730aaaba345SLars Ellenberg rcu_read_lock(); 1731aaaba345SLars Ellenberg csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only; 1732aaaba345SLars Ellenberg rcu_read_unlock(); 1733aaaba345SLars Ellenberg return connection->agreed_pro_version >= 89 && /* supported? */ 1734aaaba345SLars Ellenberg connection->csums_tfm && /* configured? */ 17357e5fec31SFabian Frederick (csums_after_crash_only == false /* use for each resync? */ 1736aaaba345SLars Ellenberg || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ 1737aaaba345SLars Ellenberg } 1738aaaba345SLars Ellenberg 1739b411b363SPhilipp Reisner /** 1740b411b363SPhilipp Reisner * drbd_start_resync() - Start the resync process 1741b30ab791SAndreas Gruenbacher * @device: DRBD device. 1742b411b363SPhilipp Reisner * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET 1743b411b363SPhilipp Reisner * 1744b411b363SPhilipp Reisner * This function might bring you directly into one of the 1745b411b363SPhilipp Reisner * C_PAUSED_SYNC_* states. 1746b411b363SPhilipp Reisner */ 1747b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) 1748b411b363SPhilipp Reisner { 174944a4d551SLars Ellenberg struct drbd_peer_device *peer_device = first_peer_device(device); 175044a4d551SLars Ellenberg struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 1751b411b363SPhilipp Reisner union drbd_state ns; 1752b411b363SPhilipp Reisner int r; 1753b411b363SPhilipp Reisner 1754b30ab791SAndreas Gruenbacher if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) { 1755d0180171SAndreas Gruenbacher drbd_err(device, "Resync already running!\n"); 1756b411b363SPhilipp Reisner return; 1757b411b363SPhilipp Reisner } 1758b411b363SPhilipp Reisner 1759d3d2948fSRoland Kammerer if (!connection) { 1760d3d2948fSRoland Kammerer drbd_err(device, "No connection to peer, aborting!\n"); 1761d3d2948fSRoland Kammerer return; 1762d3d2948fSRoland Kammerer } 1763d3d2948fSRoland Kammerer 1764b30ab791SAndreas Gruenbacher if (!test_bit(B_RS_H_DONE, &device->flags)) { 1765b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) { 1766b411b363SPhilipp Reisner /* Since application IO was locked out during C_WF_BITMAP_T and 1767b411b363SPhilipp Reisner C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET 1768b411b363SPhilipp Reisner we check that we might make the data inconsistent. */ 1769b30ab791SAndreas Gruenbacher r = drbd_khelper(device, "before-resync-target"); 1770b411b363SPhilipp Reisner r = (r >> 8) & 0xff; 1771b411b363SPhilipp Reisner if (r > 0) { 1772d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-target handler returned %d, " 1773b411b363SPhilipp Reisner "dropping connection.\n", r); 177444a4d551SLars Ellenberg conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 1775b411b363SPhilipp Reisner return; 1776b411b363SPhilipp Reisner } 177709b9e797SPhilipp Reisner } else /* C_SYNC_SOURCE */ { 1778b30ab791SAndreas Gruenbacher r = drbd_khelper(device, "before-resync-source"); 177909b9e797SPhilipp Reisner r = (r >> 8) & 0xff; 178009b9e797SPhilipp Reisner if (r > 0) { 178109b9e797SPhilipp Reisner if (r == 3) { 1782d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-source handler returned %d, " 178309b9e797SPhilipp Reisner "ignoring. Old userland tools?", r); 178409b9e797SPhilipp Reisner } else { 1785d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-source handler returned %d, " 178609b9e797SPhilipp Reisner "dropping connection.\n", r); 178744a4d551SLars Ellenberg conn_request_state(connection, 1788a6b32bc3SAndreas Gruenbacher NS(conn, C_DISCONNECTING), CS_HARD); 178909b9e797SPhilipp Reisner return; 179009b9e797SPhilipp Reisner } 179109b9e797SPhilipp Reisner } 1792b411b363SPhilipp Reisner } 1793e64a3294SPhilipp Reisner } 1794b411b363SPhilipp Reisner 179544a4d551SLars Ellenberg if (current == connection->worker.task) { 1796dad20554SPhilipp Reisner /* The worker should not sleep waiting for state_mutex, 1797e64a3294SPhilipp Reisner that can take long */ 1798b30ab791SAndreas Gruenbacher if (!mutex_trylock(device->state_mutex)) { 1799b30ab791SAndreas Gruenbacher set_bit(B_RS_H_DONE, &device->flags); 1800b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ/5; 1801b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 1802e64a3294SPhilipp Reisner return; 1803e64a3294SPhilipp Reisner } 1804e64a3294SPhilipp Reisner } else { 1805b30ab791SAndreas Gruenbacher mutex_lock(device->state_mutex); 1806e64a3294SPhilipp Reisner } 1807b411b363SPhilipp Reisner 180828bc3b8cSAndreas Gruenbacher lock_all_resources(); 180928bc3b8cSAndreas Gruenbacher clear_bit(B_RS_H_DONE, &device->flags); 1810a700471bSPhilipp Reisner /* Did some connection breakage or IO error race with us? */ 1811b30ab791SAndreas Gruenbacher if (device->state.conn < C_CONNECTED 1812b30ab791SAndreas Gruenbacher || !get_ldev_if_state(device, D_NEGOTIATING)) { 181328bc3b8cSAndreas Gruenbacher unlock_all_resources(); 181428bc3b8cSAndreas Gruenbacher goto out; 1815b411b363SPhilipp Reisner } 1816b411b363SPhilipp Reisner 1817b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 1818b411b363SPhilipp Reisner 1819b30ab791SAndreas Gruenbacher ns.aftr_isp = !_drbd_may_sync_now(device); 1820b411b363SPhilipp Reisner 1821b411b363SPhilipp Reisner ns.conn = side; 1822b411b363SPhilipp Reisner 1823b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) 1824b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 1825b411b363SPhilipp Reisner else /* side == C_SYNC_SOURCE */ 1826b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 1827b411b363SPhilipp Reisner 182828bc3b8cSAndreas Gruenbacher r = _drbd_set_state(device, ns, CS_VERBOSE, NULL); 1829b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 1830b411b363SPhilipp Reisner 1831b411b363SPhilipp Reisner if (ns.conn < C_CONNECTED) 1832b411b363SPhilipp Reisner r = SS_UNKNOWN_ERROR; 1833b411b363SPhilipp Reisner 1834b411b363SPhilipp Reisner if (r == SS_SUCCESS) { 1835b30ab791SAndreas Gruenbacher unsigned long tw = drbd_bm_total_weight(device); 18361d7734a0SLars Ellenberg unsigned long now = jiffies; 18371d7734a0SLars Ellenberg int i; 18381d7734a0SLars Ellenberg 1839b30ab791SAndreas Gruenbacher device->rs_failed = 0; 1840b30ab791SAndreas Gruenbacher device->rs_paused = 0; 1841b30ab791SAndreas Gruenbacher device->rs_same_csum = 0; 1842b30ab791SAndreas Gruenbacher device->rs_last_sect_ev = 0; 1843b30ab791SAndreas Gruenbacher device->rs_total = tw; 1844b30ab791SAndreas Gruenbacher device->rs_start = now; 18451d7734a0SLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 1846b30ab791SAndreas Gruenbacher device->rs_mark_left[i] = tw; 1847b30ab791SAndreas Gruenbacher device->rs_mark_time[i] = now; 18481d7734a0SLars Ellenberg } 184928bc3b8cSAndreas Gruenbacher drbd_pause_after(device); 18505ab7d2c0SLars Ellenberg /* Forget potentially stale cached per resync extent bit-counts. 18515ab7d2c0SLars Ellenberg * Open coded drbd_rs_cancel_all(device), we already have IRQs 18525ab7d2c0SLars Ellenberg * disabled, and know the disk state is ok. */ 18535ab7d2c0SLars Ellenberg spin_lock(&device->al_lock); 18545ab7d2c0SLars Ellenberg lc_reset(device->resync); 18555ab7d2c0SLars Ellenberg device->resync_locked = 0; 18565ab7d2c0SLars Ellenberg device->resync_wenr = LC_FREE; 18575ab7d2c0SLars Ellenberg spin_unlock(&device->al_lock); 1858b411b363SPhilipp Reisner } 185928bc3b8cSAndreas Gruenbacher unlock_all_resources(); 18605a22db89SLars Ellenberg 18616c922ed5SLars Ellenberg if (r == SS_SUCCESS) { 18625ab7d2c0SLars Ellenberg wake_up(&device->al_wait); /* for lc_reset() above */ 1863328e0f12SPhilipp Reisner /* reset rs_last_bcast when a resync or verify is started, 1864328e0f12SPhilipp Reisner * to deal with potential jiffies wrap. */ 1865b30ab791SAndreas Gruenbacher device->rs_last_bcast = jiffies - HZ; 1866328e0f12SPhilipp Reisner 1867d0180171SAndreas Gruenbacher drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", 18686c922ed5SLars Ellenberg drbd_conn_str(ns.conn), 1869b30ab791SAndreas Gruenbacher (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), 1870b30ab791SAndreas Gruenbacher (unsigned long) device->rs_total); 1871aaaba345SLars Ellenberg if (side == C_SYNC_TARGET) { 1872b30ab791SAndreas Gruenbacher device->bm_resync_fo = 0; 1873aaaba345SLars Ellenberg device->use_csums = use_checksum_based_resync(connection, device); 1874aaaba345SLars Ellenberg } else { 18757e5fec31SFabian Frederick device->use_csums = false; 1876aaaba345SLars Ellenberg } 18775a22db89SLars Ellenberg 18785a22db89SLars Ellenberg /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid 18795a22db89SLars Ellenberg * with w_send_oos, or the sync target will get confused as to 18805a22db89SLars Ellenberg * how much bits to resync. We cannot do that always, because for an 18815a22db89SLars Ellenberg * empty resync and protocol < 95, we need to do it here, as we call 18825a22db89SLars Ellenberg * drbd_resync_finished from here in that case. 18835a22db89SLars Ellenberg * We drbd_gen_and_send_sync_uuid here for protocol < 96, 18845a22db89SLars Ellenberg * and from after_state_ch otherwise. */ 188544a4d551SLars Ellenberg if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96) 188644a4d551SLars Ellenberg drbd_gen_and_send_sync_uuid(peer_device); 1887b411b363SPhilipp Reisner 188844a4d551SLars Ellenberg if (connection->agreed_pro_version < 95 && device->rs_total == 0) { 1889af85e8e8SLars Ellenberg /* This still has a race (about when exactly the peers 1890af85e8e8SLars Ellenberg * detect connection loss) that can lead to a full sync 1891af85e8e8SLars Ellenberg * on next handshake. In 8.3.9 we fixed this with explicit 1892af85e8e8SLars Ellenberg * resync-finished notifications, but the fix 1893af85e8e8SLars Ellenberg * introduces a protocol change. Sleeping for some 1894af85e8e8SLars Ellenberg * time longer than the ping interval + timeout on the 1895af85e8e8SLars Ellenberg * SyncSource, to give the SyncTarget the chance to 1896af85e8e8SLars Ellenberg * detect connection loss, then waiting for a ping 1897af85e8e8SLars Ellenberg * response (implicit in drbd_resync_finished) reduces 1898af85e8e8SLars Ellenberg * the race considerably, but does not solve it. */ 189944ed167dSPhilipp Reisner if (side == C_SYNC_SOURCE) { 190044ed167dSPhilipp Reisner struct net_conf *nc; 190144ed167dSPhilipp Reisner int timeo; 190244ed167dSPhilipp Reisner 190344ed167dSPhilipp Reisner rcu_read_lock(); 190444a4d551SLars Ellenberg nc = rcu_dereference(connection->net_conf); 190544ed167dSPhilipp Reisner timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; 190644ed167dSPhilipp Reisner rcu_read_unlock(); 190744ed167dSPhilipp Reisner schedule_timeout_interruptible(timeo); 190844ed167dSPhilipp Reisner } 1909b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 1910b411b363SPhilipp Reisner } 1911b411b363SPhilipp Reisner 1912b30ab791SAndreas Gruenbacher drbd_rs_controller_reset(device); 1913b30ab791SAndreas Gruenbacher /* ns.conn may already be != device->state.conn, 1914b411b363SPhilipp Reisner * we may have been paused in between, or become paused until 1915b411b363SPhilipp Reisner * the timer triggers. 1916b411b363SPhilipp Reisner * No matter, that is handled in resync_timer_fn() */ 1917b411b363SPhilipp Reisner if (ns.conn == C_SYNC_TARGET) 1918b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies); 1919b411b363SPhilipp Reisner 1920b30ab791SAndreas Gruenbacher drbd_md_sync(device); 1921b411b363SPhilipp Reisner } 1922b30ab791SAndreas Gruenbacher put_ldev(device); 192328bc3b8cSAndreas Gruenbacher out: 1924b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 1925b411b363SPhilipp Reisner } 1926b411b363SPhilipp Reisner 1927e334f550SLars Ellenberg static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) 1928c7a58db4SLars Ellenberg { 1929c7a58db4SLars Ellenberg struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; 1930c7a58db4SLars Ellenberg device->rs_last_bcast = jiffies; 1931c7a58db4SLars Ellenberg 1932c7a58db4SLars Ellenberg if (!get_ldev(device)) 1933c7a58db4SLars Ellenberg return; 1934c7a58db4SLars Ellenberg 1935c7a58db4SLars Ellenberg drbd_bm_write_lazy(device, 0); 19365ab7d2c0SLars Ellenberg if (resync_done && is_sync_state(device->state.conn)) 1937c7a58db4SLars Ellenberg drbd_resync_finished(device); 19385ab7d2c0SLars Ellenberg 1939c7a58db4SLars Ellenberg drbd_bcast_event(device, &sib); 1940c7a58db4SLars Ellenberg /* update timestamp, in case it took a while to write out stuff */ 1941c7a58db4SLars Ellenberg device->rs_last_bcast = jiffies; 1942c7a58db4SLars Ellenberg put_ldev(device); 1943c7a58db4SLars Ellenberg } 1944c7a58db4SLars Ellenberg 1945e334f550SLars Ellenberg static void drbd_ldev_destroy(struct drbd_device *device) 1946e334f550SLars Ellenberg { 1947e334f550SLars Ellenberg lc_destroy(device->resync); 1948e334f550SLars Ellenberg device->resync = NULL; 1949e334f550SLars Ellenberg lc_destroy(device->act_log); 1950e334f550SLars Ellenberg device->act_log = NULL; 1951d1b80853SAndreas Gruenbacher 1952d1b80853SAndreas Gruenbacher __acquire(local); 195363a7c8adSLars Ellenberg drbd_backing_dev_free(device, device->ldev); 1954d1b80853SAndreas Gruenbacher device->ldev = NULL; 1955d1b80853SAndreas Gruenbacher __release(local); 1956d1b80853SAndreas Gruenbacher 1957e334f550SLars Ellenberg clear_bit(GOING_DISKLESS, &device->flags); 1958e334f550SLars Ellenberg wake_up(&device->misc_wait); 1959e334f550SLars Ellenberg } 1960e334f550SLars Ellenberg 1961e334f550SLars Ellenberg static void go_diskless(struct drbd_device *device) 1962e334f550SLars Ellenberg { 1963e334f550SLars Ellenberg D_ASSERT(device, device->state.disk == D_FAILED); 1964e334f550SLars Ellenberg /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will 1965e334f550SLars Ellenberg * inc/dec it frequently. Once we are D_DISKLESS, no one will touch 1966e334f550SLars Ellenberg * the protected members anymore, though, so once put_ldev reaches zero 1967e334f550SLars Ellenberg * again, it will be safe to free them. */ 1968e334f550SLars Ellenberg 1969e334f550SLars Ellenberg /* Try to write changed bitmap pages, read errors may have just 1970e334f550SLars Ellenberg * set some bits outside the area covered by the activity log. 1971e334f550SLars Ellenberg * 1972e334f550SLars Ellenberg * If we have an IO error during the bitmap writeout, 1973e334f550SLars Ellenberg * we will want a full sync next time, just in case. 1974e334f550SLars Ellenberg * (Do we want a specific meta data flag for this?) 1975e334f550SLars Ellenberg * 1976e334f550SLars Ellenberg * If that does not make it to stable storage either, 1977e334f550SLars Ellenberg * we cannot do anything about that anymore. 1978e334f550SLars Ellenberg * 1979e334f550SLars Ellenberg * We still need to check if both bitmap and ldev are present, we may 1980e334f550SLars Ellenberg * end up here after a failed attach, before ldev was even assigned. 1981e334f550SLars Ellenberg */ 1982e334f550SLars Ellenberg if (device->bitmap && device->ldev) { 1983e334f550SLars Ellenberg /* An interrupted resync or similar is allowed to recounts bits 1984e334f550SLars Ellenberg * while we detach. 1985e334f550SLars Ellenberg * Any modifications would not be expected anymore, though. 1986e334f550SLars Ellenberg */ 1987e334f550SLars Ellenberg if (drbd_bitmap_io_from_worker(device, drbd_bm_write, 1988e334f550SLars Ellenberg "detach", BM_LOCKED_TEST_ALLOWED)) { 1989e334f550SLars Ellenberg if (test_bit(WAS_READ_ERROR, &device->flags)) { 1990e334f550SLars Ellenberg drbd_md_set_flag(device, MDF_FULL_SYNC); 1991e334f550SLars Ellenberg drbd_md_sync(device); 1992e334f550SLars Ellenberg } 1993e334f550SLars Ellenberg } 1994e334f550SLars Ellenberg } 1995e334f550SLars Ellenberg 1996e334f550SLars Ellenberg drbd_force_state(device, NS(disk, D_DISKLESS)); 1997e334f550SLars Ellenberg } 1998e334f550SLars Ellenberg 1999ac0acb9eSLars Ellenberg static int do_md_sync(struct drbd_device *device) 2000ac0acb9eSLars Ellenberg { 2001ac0acb9eSLars Ellenberg drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); 2002ac0acb9eSLars Ellenberg drbd_md_sync(device); 2003ac0acb9eSLars Ellenberg return 0; 2004ac0acb9eSLars Ellenberg } 2005ac0acb9eSLars Ellenberg 2006944410e9SLars Ellenberg /* only called from drbd_worker thread, no locking */ 2007944410e9SLars Ellenberg void __update_timing_details( 2008944410e9SLars Ellenberg struct drbd_thread_timing_details *tdp, 2009944410e9SLars Ellenberg unsigned int *cb_nr, 2010944410e9SLars Ellenberg void *cb, 2011944410e9SLars Ellenberg const char *fn, const unsigned int line) 2012944410e9SLars Ellenberg { 2013944410e9SLars Ellenberg unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST; 2014944410e9SLars Ellenberg struct drbd_thread_timing_details *td = tdp + i; 2015944410e9SLars Ellenberg 2016944410e9SLars Ellenberg td->start_jif = jiffies; 2017944410e9SLars Ellenberg td->cb_addr = cb; 2018944410e9SLars Ellenberg td->caller_fn = fn; 2019944410e9SLars Ellenberg td->line = line; 2020944410e9SLars Ellenberg td->cb_nr = *cb_nr; 2021944410e9SLars Ellenberg 2022944410e9SLars Ellenberg i = (i+1) % DRBD_THREAD_DETAILS_HIST; 2023944410e9SLars Ellenberg td = tdp + i; 2024944410e9SLars Ellenberg memset(td, 0, sizeof(*td)); 2025944410e9SLars Ellenberg 2026944410e9SLars Ellenberg ++(*cb_nr); 2027944410e9SLars Ellenberg } 2028944410e9SLars Ellenberg 2029e334f550SLars Ellenberg static void do_device_work(struct drbd_device *device, const unsigned long todo) 2030e334f550SLars Ellenberg { 2031b47a06d1SAndreas Gruenbacher if (test_bit(MD_SYNC, &todo)) 2032ac0acb9eSLars Ellenberg do_md_sync(device); 2033b47a06d1SAndreas Gruenbacher if (test_bit(RS_DONE, &todo) || 2034b47a06d1SAndreas Gruenbacher test_bit(RS_PROGRESS, &todo)) 2035b47a06d1SAndreas Gruenbacher update_on_disk_bitmap(device, test_bit(RS_DONE, &todo)); 2036b47a06d1SAndreas Gruenbacher if (test_bit(GO_DISKLESS, &todo)) 2037e334f550SLars Ellenberg go_diskless(device); 2038b47a06d1SAndreas Gruenbacher if (test_bit(DESTROY_DISK, &todo)) 2039e334f550SLars Ellenberg drbd_ldev_destroy(device); 2040b47a06d1SAndreas Gruenbacher if (test_bit(RS_START, &todo)) 2041ac0acb9eSLars Ellenberg do_start_resync(device); 2042e334f550SLars Ellenberg } 2043e334f550SLars Ellenberg 2044e334f550SLars Ellenberg #define DRBD_DEVICE_WORK_MASK \ 2045e334f550SLars Ellenberg ((1UL << GO_DISKLESS) \ 2046e334f550SLars Ellenberg |(1UL << DESTROY_DISK) \ 2047ac0acb9eSLars Ellenberg |(1UL << MD_SYNC) \ 2048ac0acb9eSLars Ellenberg |(1UL << RS_START) \ 2049e334f550SLars Ellenberg |(1UL << RS_PROGRESS) \ 2050e334f550SLars Ellenberg |(1UL << RS_DONE) \ 2051e334f550SLars Ellenberg ) 2052e334f550SLars Ellenberg 2053e334f550SLars Ellenberg static unsigned long get_work_bits(unsigned long *flags) 2054e334f550SLars Ellenberg { 2055e334f550SLars Ellenberg unsigned long old, new; 2056e334f550SLars Ellenberg do { 2057e334f550SLars Ellenberg old = *flags; 2058e334f550SLars Ellenberg new = old & ~DRBD_DEVICE_WORK_MASK; 2059e334f550SLars Ellenberg } while (cmpxchg(flags, old, new) != old); 2060e334f550SLars Ellenberg return old & DRBD_DEVICE_WORK_MASK; 2061e334f550SLars Ellenberg } 2062e334f550SLars Ellenberg 2063e334f550SLars Ellenberg static void do_unqueued_work(struct drbd_connection *connection) 2064c7a58db4SLars Ellenberg { 2065c7a58db4SLars Ellenberg struct drbd_peer_device *peer_device; 2066c7a58db4SLars Ellenberg int vnr; 2067c7a58db4SLars Ellenberg 2068c7a58db4SLars Ellenberg rcu_read_lock(); 2069c7a58db4SLars Ellenberg idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2070c7a58db4SLars Ellenberg struct drbd_device *device = peer_device->device; 2071e334f550SLars Ellenberg unsigned long todo = get_work_bits(&device->flags); 2072e334f550SLars Ellenberg if (!todo) 2073c7a58db4SLars Ellenberg continue; 20745ab7d2c0SLars Ellenberg 2075c7a58db4SLars Ellenberg kref_get(&device->kref); 2076c7a58db4SLars Ellenberg rcu_read_unlock(); 2077e334f550SLars Ellenberg do_device_work(device, todo); 2078c7a58db4SLars Ellenberg kref_put(&device->kref, drbd_destroy_device); 2079c7a58db4SLars Ellenberg rcu_read_lock(); 2080c7a58db4SLars Ellenberg } 2081c7a58db4SLars Ellenberg rcu_read_unlock(); 2082c7a58db4SLars Ellenberg } 2083c7a58db4SLars Ellenberg 2084a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) 20858c0785a5SLars Ellenberg { 20868c0785a5SLars Ellenberg spin_lock_irq(&queue->q_lock); 208715e26f6aSLars Ellenberg list_splice_tail_init(&queue->q, work_list); 20888c0785a5SLars Ellenberg spin_unlock_irq(&queue->q_lock); 20898c0785a5SLars Ellenberg return !list_empty(work_list); 20908c0785a5SLars Ellenberg } 20918c0785a5SLars Ellenberg 2092bde89a9eSAndreas Gruenbacher static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list) 2093b6dd1a89SLars Ellenberg { 2094b6dd1a89SLars Ellenberg DEFINE_WAIT(wait); 2095b6dd1a89SLars Ellenberg struct net_conf *nc; 2096b6dd1a89SLars Ellenberg int uncork, cork; 2097b6dd1a89SLars Ellenberg 2098abde9cc6SLars Ellenberg dequeue_work_batch(&connection->sender_work, work_list); 2099b6dd1a89SLars Ellenberg if (!list_empty(work_list)) 2100b6dd1a89SLars Ellenberg return; 2101b6dd1a89SLars Ellenberg 2102b6dd1a89SLars Ellenberg /* Still nothing to do? 2103b6dd1a89SLars Ellenberg * Maybe we still need to close the current epoch, 2104b6dd1a89SLars Ellenberg * even if no new requests are queued yet. 2105b6dd1a89SLars Ellenberg * 2106b6dd1a89SLars Ellenberg * Also, poke TCP, just in case. 2107b6dd1a89SLars Ellenberg * Then wait for new work (or signal). */ 2108b6dd1a89SLars Ellenberg rcu_read_lock(); 2109b6dd1a89SLars Ellenberg nc = rcu_dereference(connection->net_conf); 2110b6dd1a89SLars Ellenberg uncork = nc ? nc->tcp_cork : 0; 2111b6dd1a89SLars Ellenberg rcu_read_unlock(); 2112b6dd1a89SLars Ellenberg if (uncork) { 2113b6dd1a89SLars Ellenberg mutex_lock(&connection->data.mutex); 2114b6dd1a89SLars Ellenberg if (connection->data.socket) 2115b6dd1a89SLars Ellenberg drbd_tcp_uncork(connection->data.socket); 2116b6dd1a89SLars Ellenberg mutex_unlock(&connection->data.mutex); 2117b6dd1a89SLars Ellenberg } 2118b6dd1a89SLars Ellenberg 2119b6dd1a89SLars Ellenberg for (;;) { 2120b6dd1a89SLars Ellenberg int send_barrier; 2121b6dd1a89SLars Ellenberg prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); 21220500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock); 2123b6dd1a89SLars Ellenberg spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 2124bc317a9eSLars Ellenberg if (!list_empty(&connection->sender_work.q)) 21254dd726f0SLars Ellenberg list_splice_tail_init(&connection->sender_work.q, work_list); 2126b6dd1a89SLars Ellenberg spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 2127b6dd1a89SLars Ellenberg if (!list_empty(work_list) || signal_pending(current)) { 21280500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 2129b6dd1a89SLars Ellenberg break; 2130b6dd1a89SLars Ellenberg } 2131f9c78128SLars Ellenberg 2132f9c78128SLars Ellenberg /* We found nothing new to do, no to-be-communicated request, 2133f9c78128SLars Ellenberg * no other work item. We may still need to close the last 2134f9c78128SLars Ellenberg * epoch. Next incoming request epoch will be connection -> 2135f9c78128SLars Ellenberg * current transfer log epoch number. If that is different 2136f9c78128SLars Ellenberg * from the epoch of the last request we communicated, it is 2137f9c78128SLars Ellenberg * safe to send the epoch separating barrier now. 2138f9c78128SLars Ellenberg */ 2139f9c78128SLars Ellenberg send_barrier = 2140f9c78128SLars Ellenberg atomic_read(&connection->current_tle_nr) != 2141f9c78128SLars Ellenberg connection->send.current_epoch_nr; 21420500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 2143f9c78128SLars Ellenberg 2144f9c78128SLars Ellenberg if (send_barrier) 2145f9c78128SLars Ellenberg maybe_send_barrier(connection, 2146f9c78128SLars Ellenberg connection->send.current_epoch_nr + 1); 21475ab7d2c0SLars Ellenberg 2148e334f550SLars Ellenberg if (test_bit(DEVICE_WORK_PENDING, &connection->flags)) 21495ab7d2c0SLars Ellenberg break; 21505ab7d2c0SLars Ellenberg 2151a80ca1aeSLars Ellenberg /* drbd_send() may have called flush_signals() */ 2152a80ca1aeSLars Ellenberg if (get_t_state(&connection->worker) != RUNNING) 2153a80ca1aeSLars Ellenberg break; 21545ab7d2c0SLars Ellenberg 2155b6dd1a89SLars Ellenberg schedule(); 2156b6dd1a89SLars Ellenberg /* may be woken up for other things but new work, too, 2157b6dd1a89SLars Ellenberg * e.g. if the current epoch got closed. 2158b6dd1a89SLars Ellenberg * In which case we send the barrier above. */ 2159b6dd1a89SLars Ellenberg } 2160b6dd1a89SLars Ellenberg finish_wait(&connection->sender_work.q_wait, &wait); 2161b6dd1a89SLars Ellenberg 2162b6dd1a89SLars Ellenberg /* someone may have changed the config while we have been waiting above. */ 2163b6dd1a89SLars Ellenberg rcu_read_lock(); 2164b6dd1a89SLars Ellenberg nc = rcu_dereference(connection->net_conf); 2165b6dd1a89SLars Ellenberg cork = nc ? nc->tcp_cork : 0; 2166b6dd1a89SLars Ellenberg rcu_read_unlock(); 2167b6dd1a89SLars Ellenberg mutex_lock(&connection->data.mutex); 2168b6dd1a89SLars Ellenberg if (connection->data.socket) { 2169b6dd1a89SLars Ellenberg if (cork) 2170b6dd1a89SLars Ellenberg drbd_tcp_cork(connection->data.socket); 2171b6dd1a89SLars Ellenberg else if (!uncork) 2172b6dd1a89SLars Ellenberg drbd_tcp_uncork(connection->data.socket); 2173b6dd1a89SLars Ellenberg } 2174b6dd1a89SLars Ellenberg mutex_unlock(&connection->data.mutex); 2175b6dd1a89SLars Ellenberg } 2176b6dd1a89SLars Ellenberg 2177b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi) 2178b411b363SPhilipp Reisner { 2179bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 21806db7e50aSAndreas Gruenbacher struct drbd_work *w = NULL; 2181c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2182b411b363SPhilipp Reisner LIST_HEAD(work_list); 21838c0785a5SLars Ellenberg int vnr; 2184b411b363SPhilipp Reisner 2185e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) { 218680822284SPhilipp Reisner drbd_thread_current_set_cpu(thi); 2187b411b363SPhilipp Reisner 2188944410e9SLars Ellenberg if (list_empty(&work_list)) { 2189944410e9SLars Ellenberg update_worker_timing_details(connection, wait_for_work); 2190bde89a9eSAndreas Gruenbacher wait_for_work(connection, &work_list); 2191944410e9SLars Ellenberg } 2192b411b363SPhilipp Reisner 2193944410e9SLars Ellenberg if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { 2194944410e9SLars Ellenberg update_worker_timing_details(connection, do_unqueued_work); 2195e334f550SLars Ellenberg do_unqueued_work(connection); 2196944410e9SLars Ellenberg } 21975ab7d2c0SLars Ellenberg 21988c0785a5SLars Ellenberg if (signal_pending(current)) { 2199b411b363SPhilipp Reisner flush_signals(current); 220019393e10SPhilipp Reisner if (get_t_state(thi) == RUNNING) { 22011ec861ebSAndreas Gruenbacher drbd_warn(connection, "Worker got an unexpected signal\n"); 2202b411b363SPhilipp Reisner continue; 220319393e10SPhilipp Reisner } 2204b411b363SPhilipp Reisner break; 2205b411b363SPhilipp Reisner } 2206b411b363SPhilipp Reisner 2207e77a0a5cSAndreas Gruenbacher if (get_t_state(thi) != RUNNING) 2208b411b363SPhilipp Reisner break; 2209b411b363SPhilipp Reisner 2210729e8b87SLars Ellenberg if (!list_empty(&work_list)) { 22116db7e50aSAndreas Gruenbacher w = list_first_entry(&work_list, struct drbd_work, list); 22126db7e50aSAndreas Gruenbacher list_del_init(&w->list); 2213944410e9SLars Ellenberg update_worker_timing_details(connection, w->cb); 22146db7e50aSAndreas Gruenbacher if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) 22158c0785a5SLars Ellenberg continue; 2216bde89a9eSAndreas Gruenbacher if (connection->cstate >= C_WF_REPORT_PARAMS) 2217bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 2218b411b363SPhilipp Reisner } 2219b411b363SPhilipp Reisner } 2220b411b363SPhilipp Reisner 22218c0785a5SLars Ellenberg do { 2222944410e9SLars Ellenberg if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { 2223944410e9SLars Ellenberg update_worker_timing_details(connection, do_unqueued_work); 2224e334f550SLars Ellenberg do_unqueued_work(connection); 2225944410e9SLars Ellenberg } 2226729e8b87SLars Ellenberg if (!list_empty(&work_list)) { 22276db7e50aSAndreas Gruenbacher w = list_first_entry(&work_list, struct drbd_work, list); 22286db7e50aSAndreas Gruenbacher list_del_init(&w->list); 2229944410e9SLars Ellenberg update_worker_timing_details(connection, w->cb); 22306db7e50aSAndreas Gruenbacher w->cb(w, 1); 2231729e8b87SLars Ellenberg } else 2232bde89a9eSAndreas Gruenbacher dequeue_work_batch(&connection->sender_work, &work_list); 2233e334f550SLars Ellenberg } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags)); 2234b411b363SPhilipp Reisner 2235c141ebdaSPhilipp Reisner rcu_read_lock(); 2236c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2237c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 22380b0ba1efSAndreas Gruenbacher D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE); 2239b30ab791SAndreas Gruenbacher kref_get(&device->kref); 2240c141ebdaSPhilipp Reisner rcu_read_unlock(); 2241b30ab791SAndreas Gruenbacher drbd_device_cleanup(device); 224205a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 2243c141ebdaSPhilipp Reisner rcu_read_lock(); 22440e29d163SPhilipp Reisner } 2245c141ebdaSPhilipp Reisner rcu_read_unlock(); 2246b411b363SPhilipp Reisner 2247b411b363SPhilipp Reisner return 0; 2248b411b363SPhilipp Reisner } 2249