1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_worker.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner 24b411b363SPhilipp Reisner */ 25b411b363SPhilipp Reisner 26b411b363SPhilipp Reisner #include <linux/module.h> 27b411b363SPhilipp Reisner #include <linux/drbd.h> 28174cd4b1SIngo Molnar #include <linux/sched/signal.h> 29b411b363SPhilipp Reisner #include <linux/wait.h> 30b411b363SPhilipp Reisner #include <linux/mm.h> 31b411b363SPhilipp Reisner #include <linux/memcontrol.h> 32b411b363SPhilipp Reisner #include <linux/mm_inline.h> 33b411b363SPhilipp Reisner #include <linux/slab.h> 34b411b363SPhilipp Reisner #include <linux/random.h> 35b411b363SPhilipp Reisner #include <linux/string.h> 36b411b363SPhilipp Reisner #include <linux/scatterlist.h> 37b411b363SPhilipp Reisner 38b411b363SPhilipp Reisner #include "drbd_int.h" 39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h" 40b411b363SPhilipp Reisner #include "drbd_req.h" 41b411b363SPhilipp Reisner 42d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *, int); 43d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *, int); 44b411b363SPhilipp Reisner 45c5a91619SAndreas Gruenbacher /* endio handlers: 46ed15b795SAndreas Gruenbacher * drbd_md_endio (defined here) 47fcefa62eSAndreas Gruenbacher * drbd_request_endio (defined here) 48fcefa62eSAndreas Gruenbacher * drbd_peer_request_endio (defined here) 49ed15b795SAndreas Gruenbacher * drbd_bm_endio (defined in drbd_bitmap.c) 50c5a91619SAndreas Gruenbacher * 51b411b363SPhilipp Reisner * For all these callbacks, note the following: 52b411b363SPhilipp Reisner * The callbacks will be called in irq context by the IDE drivers, 53b411b363SPhilipp Reisner * and in Softirqs/Tasklets/BH context by the SCSI drivers. 54b411b363SPhilipp Reisner * Try to get the locking right :) 55b411b363SPhilipp Reisner * 56b411b363SPhilipp Reisner */ 57b411b363SPhilipp Reisner 58b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO 59b411b363SPhilipp Reisner * submitted by drbd_md_sync_page_io() 60b411b363SPhilipp Reisner */ 614246a0b6SChristoph Hellwig void drbd_md_endio(struct bio *bio) 62b411b363SPhilipp Reisner { 63b30ab791SAndreas Gruenbacher struct drbd_device *device; 64b411b363SPhilipp Reisner 65e37d2438SLars Ellenberg device = bio->bi_private; 664e4cbee9SChristoph Hellwig device->md_io.error = blk_status_to_errno(bio->bi_status); 67b411b363SPhilipp Reisner 687c752ed3SLars Ellenberg /* special case: drbd_md_read() during drbd_adm_attach() */ 697c752ed3SLars Ellenberg if (device->ldev) 707c752ed3SLars Ellenberg put_ldev(device); 717c752ed3SLars Ellenberg bio_put(bio); 727c752ed3SLars Ellenberg 730cfac5ddSPhilipp Reisner /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able 740cfac5ddSPhilipp Reisner * to timeout on the lower level device, and eventually detach from it. 750cfac5ddSPhilipp Reisner * If this io completion runs after that timeout expired, this 760cfac5ddSPhilipp Reisner * drbd_md_put_buffer() may allow us to finally try and re-attach. 770cfac5ddSPhilipp Reisner * During normal operation, this only puts that extra reference 780cfac5ddSPhilipp Reisner * down to 1 again. 790cfac5ddSPhilipp Reisner * Make sure we first drop the reference, and only then signal 800cfac5ddSPhilipp Reisner * completion, or we may (in drbd_al_read_log()) cycle so fast into the 810cfac5ddSPhilipp Reisner * next drbd_md_sync_page_io(), that we trigger the 82b30ab791SAndreas Gruenbacher * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. 830cfac5ddSPhilipp Reisner */ 84b30ab791SAndreas Gruenbacher drbd_md_put_buffer(device); 85e37d2438SLars Ellenberg device->md_io.done = 1; 86b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 87b411b363SPhilipp Reisner } 88b411b363SPhilipp Reisner 89b411b363SPhilipp Reisner /* reads on behalf of the partner, 90b411b363SPhilipp Reisner * "submitted" by the receiver 91b411b363SPhilipp Reisner */ 92a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) 93b411b363SPhilipp Reisner { 94b411b363SPhilipp Reisner unsigned long flags = 0; 956780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 966780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 97b411b363SPhilipp Reisner 980500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 99b30ab791SAndreas Gruenbacher device->read_cnt += peer_req->i.size >> 9; 100a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 101b30ab791SAndreas Gruenbacher if (list_empty(&device->read_ee)) 102b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 103db830c46SAndreas Gruenbacher if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) 104b30ab791SAndreas Gruenbacher __drbd_chk_io_error(device, DRBD_READ_ERROR); 1050500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 106b411b363SPhilipp Reisner 1076780139cSAndreas Gruenbacher drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w); 108b30ab791SAndreas Gruenbacher put_ldev(device); 109b411b363SPhilipp Reisner } 110b411b363SPhilipp Reisner 111b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes, 11245bb912bSLars Ellenberg * "submitted" by the receiver, final stage. */ 113a0fb3c47SLars Ellenberg void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) 114b411b363SPhilipp Reisner { 115b411b363SPhilipp Reisner unsigned long flags = 0; 1166780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1176780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 118668700b4SPhilipp Reisner struct drbd_connection *connection = peer_device->connection; 119181286adSLars Ellenberg struct drbd_interval i; 120b411b363SPhilipp Reisner int do_wake; 121579b57edSAndreas Gruenbacher u64 block_id; 122b411b363SPhilipp Reisner int do_al_complete_io; 123b411b363SPhilipp Reisner 124db830c46SAndreas Gruenbacher /* after we moved peer_req to done_ee, 125b411b363SPhilipp Reisner * we may no longer access it, 126b411b363SPhilipp Reisner * it may be freed/reused already! 127b411b363SPhilipp Reisner * (as soon as we release the req_lock) */ 128181286adSLars Ellenberg i = peer_req->i; 129db830c46SAndreas Gruenbacher do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; 130db830c46SAndreas Gruenbacher block_id = peer_req->block_id; 13121ae5d7fSLars Ellenberg peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 132b411b363SPhilipp Reisner 133e1fbc4caSLars Ellenberg if (peer_req->flags & EE_WAS_ERROR) { 134e1fbc4caSLars Ellenberg /* In protocol != C, we usually do not send write acks. 135e1fbc4caSLars Ellenberg * In case of a write error, send the neg ack anyways. */ 136e1fbc4caSLars Ellenberg if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags)) 137e1fbc4caSLars Ellenberg inc_unacked(device); 138e1fbc4caSLars Ellenberg drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); 139e1fbc4caSLars Ellenberg } 140e1fbc4caSLars Ellenberg 1410500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 142b30ab791SAndreas Gruenbacher device->writ_cnt += peer_req->i.size >> 9; 143a8cd15baSAndreas Gruenbacher list_move_tail(&peer_req->w.list, &device->done_ee); 144b411b363SPhilipp Reisner 145bb3bfe96SAndreas Gruenbacher /* 1465e472264SAndreas Gruenbacher * Do not remove from the write_requests tree here: we did not send the 147bb3bfe96SAndreas Gruenbacher * Ack yet and did not wake possibly waiting conflicting requests. 148bb3bfe96SAndreas Gruenbacher * Removed from the tree from "drbd_process_done_ee" within the 14984b8c06bSAndreas Gruenbacher * appropriate dw.cb (e_end_block/e_end_resync_block) or from 150bb3bfe96SAndreas Gruenbacher * _drbd_clear_done_ee. 151bb3bfe96SAndreas Gruenbacher */ 152b411b363SPhilipp Reisner 153b30ab791SAndreas Gruenbacher do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); 154b411b363SPhilipp Reisner 1559305455aSBart Van Assche /* FIXME do we want to detach for failed REQ_OP_DISCARD? 156a0fb3c47SLars Ellenberg * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ 157a0fb3c47SLars Ellenberg if (peer_req->flags & EE_WAS_ERROR) 158b30ab791SAndreas Gruenbacher __drbd_chk_io_error(device, DRBD_WRITE_ERROR); 159668700b4SPhilipp Reisner 160668700b4SPhilipp Reisner if (connection->cstate >= C_WF_REPORT_PARAMS) { 161668700b4SPhilipp Reisner kref_get(&device->kref); /* put is in drbd_send_acks_wf() */ 162668700b4SPhilipp Reisner if (!queue_work(connection->ack_sender, &peer_device->send_acks_work)) 163668700b4SPhilipp Reisner kref_put(&device->kref, drbd_destroy_device); 164668700b4SPhilipp Reisner } 1650500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 166b411b363SPhilipp Reisner 167579b57edSAndreas Gruenbacher if (block_id == ID_SYNCER) 168b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, i.sector); 169b411b363SPhilipp Reisner 170b411b363SPhilipp Reisner if (do_wake) 171b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 172b411b363SPhilipp Reisner 173b411b363SPhilipp Reisner if (do_al_complete_io) 174b30ab791SAndreas Gruenbacher drbd_al_complete_io(device, &i); 175b411b363SPhilipp Reisner 176b30ab791SAndreas Gruenbacher put_ldev(device); 17745bb912bSLars Ellenberg } 178b411b363SPhilipp Reisner 17945bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes, 18045bb912bSLars Ellenberg * "submitted" by the receiver. 18145bb912bSLars Ellenberg */ 1824246a0b6SChristoph Hellwig void drbd_peer_request_endio(struct bio *bio) 18345bb912bSLars Ellenberg { 184db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req = bio->bi_private; 185a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_req->peer_device->device; 1867e5fec31SFabian Frederick bool is_write = bio_data_dir(bio) == WRITE; 18745c21793SChristoph Hellwig bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || 18845c21793SChristoph Hellwig bio_op(bio) == REQ_OP_DISCARD; 18945bb912bSLars Ellenberg 1904e4cbee9SChristoph Hellwig if (bio->bi_status && __ratelimit(&drbd_ratelimit_state)) 191d0180171SAndreas Gruenbacher drbd_warn(device, "%s: error=%d s=%llus\n", 192a0fb3c47SLars Ellenberg is_write ? (is_discard ? "discard" : "write") 1934e4cbee9SChristoph Hellwig : "read", bio->bi_status, 194db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 19545bb912bSLars Ellenberg 1964e4cbee9SChristoph Hellwig if (bio->bi_status) 197db830c46SAndreas Gruenbacher set_bit(__EE_WAS_ERROR, &peer_req->flags); 19845bb912bSLars Ellenberg 19945bb912bSLars Ellenberg bio_put(bio); /* no need for the bio anymore */ 200db830c46SAndreas Gruenbacher if (atomic_dec_and_test(&peer_req->pending_bios)) { 20145bb912bSLars Ellenberg if (is_write) 202db830c46SAndreas Gruenbacher drbd_endio_write_sec_final(peer_req); 20345bb912bSLars Ellenberg else 204db830c46SAndreas Gruenbacher drbd_endio_read_sec_final(peer_req); 20545bb912bSLars Ellenberg } 206b411b363SPhilipp Reisner } 207b411b363SPhilipp Reisner 2081ffa7bfaSBaoyou Xie static void 2091ffa7bfaSBaoyou Xie drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device) 210142207f7SLars Ellenberg { 211142207f7SLars Ellenberg panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n", 212142207f7SLars Ellenberg device->minor, device->resource->name, device->vnr); 213142207f7SLars Ellenberg } 214142207f7SLars Ellenberg 215b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request 216b411b363SPhilipp Reisner */ 2174246a0b6SChristoph Hellwig void drbd_request_endio(struct bio *bio) 218b411b363SPhilipp Reisner { 219a115413dSLars Ellenberg unsigned long flags; 220b411b363SPhilipp Reisner struct drbd_request *req = bio->bi_private; 22184b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 222a115413dSLars Ellenberg struct bio_and_error m; 223b411b363SPhilipp Reisner enum drbd_req_event what; 2241b6dd252SPhilipp Reisner 2251b6dd252SPhilipp Reisner /* If this request was aborted locally before, 2261b6dd252SPhilipp Reisner * but now was completed "successfully", 2271b6dd252SPhilipp Reisner * chances are that this caused arbitrary data corruption. 2281b6dd252SPhilipp Reisner * 2291b6dd252SPhilipp Reisner * "aborting" requests, or force-detaching the disk, is intended for 2301b6dd252SPhilipp Reisner * completely blocked/hung local backing devices which do no longer 2311b6dd252SPhilipp Reisner * complete requests at all, not even do error completions. In this 2321b6dd252SPhilipp Reisner * situation, usually a hard-reset and failover is the only way out. 2331b6dd252SPhilipp Reisner * 2341b6dd252SPhilipp Reisner * By "aborting", basically faking a local error-completion, 2351b6dd252SPhilipp Reisner * we allow for a more graceful swichover by cleanly migrating services. 2361b6dd252SPhilipp Reisner * Still the affected node has to be rebooted "soon". 2371b6dd252SPhilipp Reisner * 2381b6dd252SPhilipp Reisner * By completing these requests, we allow the upper layers to re-use 2391b6dd252SPhilipp Reisner * the associated data pages. 2401b6dd252SPhilipp Reisner * 2411b6dd252SPhilipp Reisner * If later the local backing device "recovers", and now DMAs some data 2421b6dd252SPhilipp Reisner * from disk into the original request pages, in the best case it will 2431b6dd252SPhilipp Reisner * just put random data into unused pages; but typically it will corrupt 2441b6dd252SPhilipp Reisner * meanwhile completely unrelated data, causing all sorts of damage. 2451b6dd252SPhilipp Reisner * 2461b6dd252SPhilipp Reisner * Which means delayed successful completion, 2471b6dd252SPhilipp Reisner * especially for READ requests, 2481b6dd252SPhilipp Reisner * is a reason to panic(). 2491b6dd252SPhilipp Reisner * 2501b6dd252SPhilipp Reisner * We assume that a delayed *error* completion is OK, 2511b6dd252SPhilipp Reisner * though we still will complain noisily about it. 2521b6dd252SPhilipp Reisner */ 2531b6dd252SPhilipp Reisner if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { 2541b6dd252SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 255d0180171SAndreas Gruenbacher drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); 2561b6dd252SPhilipp Reisner 2574e4cbee9SChristoph Hellwig if (!bio->bi_status) 258142207f7SLars Ellenberg drbd_panic_after_delayed_completion_of_aborted_request(device); 2591b6dd252SPhilipp Reisner } 2601b6dd252SPhilipp Reisner 261b411b363SPhilipp Reisner /* to avoid recursion in __req_mod */ 2624e4cbee9SChristoph Hellwig if (unlikely(bio->bi_status)) { 26370246286SChristoph Hellwig switch (bio_op(bio)) { 26445c21793SChristoph Hellwig case REQ_OP_WRITE_ZEROES: 26570246286SChristoph Hellwig case REQ_OP_DISCARD: 2664e4cbee9SChristoph Hellwig if (bio->bi_status == BLK_STS_NOTSUPP) 26770246286SChristoph Hellwig what = DISCARD_COMPLETED_NOTSUPP; 2682f632aebSLars Ellenberg else 26970246286SChristoph Hellwig what = DISCARD_COMPLETED_WITH_ERROR; 27070246286SChristoph Hellwig break; 27170246286SChristoph Hellwig case REQ_OP_READ: 2721eff9d32SJens Axboe if (bio->bi_opf & REQ_RAHEAD) 27370246286SChristoph Hellwig what = READ_AHEAD_COMPLETED_WITH_ERROR; 27470246286SChristoph Hellwig else 27570246286SChristoph Hellwig what = READ_COMPLETED_WITH_ERROR; 27670246286SChristoph Hellwig break; 27770246286SChristoph Hellwig default: 27870246286SChristoph Hellwig what = WRITE_COMPLETED_WITH_ERROR; 27970246286SChristoph Hellwig break; 28070246286SChristoph Hellwig } 28170246286SChristoph Hellwig } else { 2828554df1cSAndreas Gruenbacher what = COMPLETED_OK; 28370246286SChristoph Hellwig } 284b411b363SPhilipp Reisner 2854e4cbee9SChristoph Hellwig req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status)); 28664dafbc9SLars Ellenberg bio_put(bio); 287b411b363SPhilipp Reisner 288a115413dSLars Ellenberg /* not req_mod(), we need irqsave here! */ 2890500813fSAndreas Gruenbacher spin_lock_irqsave(&device->resource->req_lock, flags); 290a115413dSLars Ellenberg __req_mod(req, what, &m); 2910500813fSAndreas Gruenbacher spin_unlock_irqrestore(&device->resource->req_lock, flags); 292b30ab791SAndreas Gruenbacher put_ldev(device); 293a115413dSLars Ellenberg 294a115413dSLars Ellenberg if (m.bio) 295b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 296b411b363SPhilipp Reisner } 297b411b363SPhilipp Reisner 2983d0e6375SKees Cook void drbd_csum_ee(struct crypto_shash *tfm, struct drbd_peer_request *peer_req, void *digest) 29945bb912bSLars Ellenberg { 3003d0e6375SKees Cook SHASH_DESC_ON_STACK(desc, tfm); 301db830c46SAndreas Gruenbacher struct page *page = peer_req->pages; 30245bb912bSLars Ellenberg struct page *tmp; 30345bb912bSLars Ellenberg unsigned len; 3043d0e6375SKees Cook void *src; 30545bb912bSLars Ellenberg 3063d0e6375SKees Cook desc->tfm = tfm; 3073d0e6375SKees Cook desc->flags = 0; 30845bb912bSLars Ellenberg 3093d0e6375SKees Cook crypto_shash_init(desc); 31045bb912bSLars Ellenberg 3113d0e6375SKees Cook src = kmap_atomic(page); 31245bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) { 31345bb912bSLars Ellenberg /* all but the last page will be fully used */ 3143d0e6375SKees Cook crypto_shash_update(desc, src, PAGE_SIZE); 3153d0e6375SKees Cook kunmap_atomic(src); 31645bb912bSLars Ellenberg page = tmp; 3173d0e6375SKees Cook src = kmap_atomic(page); 31845bb912bSLars Ellenberg } 31945bb912bSLars Ellenberg /* and now the last, possibly only partially used page */ 320db830c46SAndreas Gruenbacher len = peer_req->i.size & (PAGE_SIZE - 1); 3213d0e6375SKees Cook crypto_shash_update(desc, src, len ?: PAGE_SIZE); 3223d0e6375SKees Cook kunmap_atomic(src); 3233d0e6375SKees Cook 3243d0e6375SKees Cook crypto_shash_final(desc, digest); 3253d0e6375SKees Cook shash_desc_zero(desc); 32645bb912bSLars Ellenberg } 32745bb912bSLars Ellenberg 3283d0e6375SKees Cook void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest) 329b411b363SPhilipp Reisner { 3303d0e6375SKees Cook SHASH_DESC_ON_STACK(desc, tfm); 3317988613bSKent Overstreet struct bio_vec bvec; 3327988613bSKent Overstreet struct bvec_iter iter; 333b411b363SPhilipp Reisner 3343d0e6375SKees Cook desc->tfm = tfm; 3353d0e6375SKees Cook desc->flags = 0; 336b411b363SPhilipp Reisner 3373d0e6375SKees Cook crypto_shash_init(desc); 338b411b363SPhilipp Reisner 3397988613bSKent Overstreet bio_for_each_segment(bvec, bio, iter) { 3403d0e6375SKees Cook u8 *src; 3413d0e6375SKees Cook 3423d0e6375SKees Cook src = kmap_atomic(bvec.bv_page); 3433d0e6375SKees Cook crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len); 3443d0e6375SKees Cook kunmap_atomic(src); 3453d0e6375SKees Cook 3469104d31aSLars Ellenberg /* REQ_OP_WRITE_SAME has only one segment, 3479104d31aSLars Ellenberg * checksum the payload only once. */ 3489104d31aSLars Ellenberg if (bio_op(bio) == REQ_OP_WRITE_SAME) 3499104d31aSLars Ellenberg break; 350b411b363SPhilipp Reisner } 3513d0e6375SKees Cook crypto_shash_final(desc, digest); 3523d0e6375SKees Cook shash_desc_zero(desc); 353b411b363SPhilipp Reisner } 354b411b363SPhilipp Reisner 3559676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */ 35699920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel) 357b411b363SPhilipp Reisner { 358a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 3596780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 3606780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 361b411b363SPhilipp Reisner int digest_size; 362b411b363SPhilipp Reisner void *digest; 36399920dc5SAndreas Gruenbacher int err = 0; 364b411b363SPhilipp Reisner 36553ea4331SLars Ellenberg if (unlikely(cancel)) 36653ea4331SLars Ellenberg goto out; 367b411b363SPhilipp Reisner 3689676c760SLars Ellenberg if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) 36953ea4331SLars Ellenberg goto out; 37053ea4331SLars Ellenberg 3713d0e6375SKees Cook digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm); 372b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 373b411b363SPhilipp Reisner if (digest) { 374db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 375db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 3766780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 3779676c760SLars Ellenberg /* Free peer_req and pages before send. 37853ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 37953ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 38053ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 381c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 382b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 383db830c46SAndreas Gruenbacher peer_req = NULL; 384b30ab791SAndreas Gruenbacher inc_rs_pending(device); 3856780139cSAndreas Gruenbacher err = drbd_send_drequest_csum(peer_device, sector, size, 38653ea4331SLars Ellenberg digest, digest_size, 387b411b363SPhilipp Reisner P_CSUM_RS_REQUEST); 388b411b363SPhilipp Reisner kfree(digest); 389b411b363SPhilipp Reisner } else { 390d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc() of digest failed.\n"); 39199920dc5SAndreas Gruenbacher err = -ENOMEM; 392b411b363SPhilipp Reisner } 393b411b363SPhilipp Reisner 39453ea4331SLars Ellenberg out: 395db830c46SAndreas Gruenbacher if (peer_req) 396b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 397b411b363SPhilipp Reisner 39899920dc5SAndreas Gruenbacher if (unlikely(err)) 399d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_drequest(..., csum) failed\n"); 40099920dc5SAndreas Gruenbacher return err; 401b411b363SPhilipp Reisner } 402b411b363SPhilipp Reisner 403b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 404b411b363SPhilipp Reisner 40569a22773SAndreas Gruenbacher static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size) 406b411b363SPhilipp Reisner { 40769a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 408db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 409b411b363SPhilipp Reisner 410b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 41180a40e43SLars Ellenberg return -EIO; 412b411b363SPhilipp Reisner 413b411b363SPhilipp Reisner /* GFP_TRY, because if there is no memory available right now, this may 414b411b363SPhilipp Reisner * be rescheduled for later. It is "only" background resync, after all. */ 41569a22773SAndreas Gruenbacher peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, 4169104d31aSLars Ellenberg size, size, GFP_TRY); 417db830c46SAndreas Gruenbacher if (!peer_req) 41880a40e43SLars Ellenberg goto defer; 419b411b363SPhilipp Reisner 420a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_send_csum; 4210500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 422b9ed7080SLars Ellenberg list_add_tail(&peer_req->w.list, &device->read_ee); 4230500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 424b411b363SPhilipp Reisner 425b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_ev); 426bb3cc85eSMike Christie if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0, 427bb3cc85eSMike Christie DRBD_FAULT_RS_RD) == 0) 42880a40e43SLars Ellenberg return 0; 42945bb912bSLars Ellenberg 43010f6d992SLars Ellenberg /* If it failed because of ENOMEM, retry should help. If it failed 43110f6d992SLars Ellenberg * because bio_add_page failed (probably broken lower level driver), 43210f6d992SLars Ellenberg * retry may or may not help. 43310f6d992SLars Ellenberg * If it does not, you may need to force disconnect. */ 4340500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 435a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 4360500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 43722cc37a9SLars Ellenberg 438b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 43980a40e43SLars Ellenberg defer: 440b30ab791SAndreas Gruenbacher put_ldev(device); 44180a40e43SLars Ellenberg return -EAGAIN; 442b411b363SPhilipp Reisner } 443b411b363SPhilipp Reisner 44499920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel) 445794abb75SPhilipp Reisner { 44684b8c06bSAndreas Gruenbacher struct drbd_device *device = 44784b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, resync_work); 44884b8c06bSAndreas Gruenbacher 449b30ab791SAndreas Gruenbacher switch (device->state.conn) { 450794abb75SPhilipp Reisner case C_VERIFY_S: 451d448a2e1SAndreas Gruenbacher make_ov_request(device, cancel); 452794abb75SPhilipp Reisner break; 453794abb75SPhilipp Reisner case C_SYNC_TARGET: 454d448a2e1SAndreas Gruenbacher make_resync_request(device, cancel); 455794abb75SPhilipp Reisner break; 456794abb75SPhilipp Reisner } 457794abb75SPhilipp Reisner 45899920dc5SAndreas Gruenbacher return 0; 459794abb75SPhilipp Reisner } 460794abb75SPhilipp Reisner 4612bccef39SKees Cook void resync_timer_fn(struct timer_list *t) 462b411b363SPhilipp Reisner { 4632bccef39SKees Cook struct drbd_device *device = from_timer(device, t, resync_timer); 464b411b363SPhilipp Reisner 46515e26f6aSLars Ellenberg drbd_queue_work_if_unqueued( 46615e26f6aSLars Ellenberg &first_peer_device(device)->connection->sender_work, 46784b8c06bSAndreas Gruenbacher &device->resync_work); 468b411b363SPhilipp Reisner } 469b411b363SPhilipp Reisner 470778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value) 471778f271dSPhilipp Reisner { 472778f271dSPhilipp Reisner int i; 473778f271dSPhilipp Reisner 474778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 475f10f2623SPhilipp Reisner fb->values[i] = value; 476778f271dSPhilipp Reisner } 477778f271dSPhilipp Reisner 478778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value) 479778f271dSPhilipp Reisner { 480778f271dSPhilipp Reisner int ov; 481778f271dSPhilipp Reisner 482778f271dSPhilipp Reisner ov = fb->values[fb->head_index]; 483778f271dSPhilipp Reisner fb->values[fb->head_index++] = value; 484778f271dSPhilipp Reisner 485778f271dSPhilipp Reisner if (fb->head_index >= fb->size) 486778f271dSPhilipp Reisner fb->head_index = 0; 487778f271dSPhilipp Reisner 488778f271dSPhilipp Reisner return ov; 489778f271dSPhilipp Reisner } 490778f271dSPhilipp Reisner 491778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value) 492778f271dSPhilipp Reisner { 493778f271dSPhilipp Reisner int i; 494778f271dSPhilipp Reisner 495778f271dSPhilipp Reisner for (i = 0; i < fb->size; i++) 496778f271dSPhilipp Reisner fb->values[i] += value; 497778f271dSPhilipp Reisner } 498778f271dSPhilipp Reisner 4999958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size) 5009958c857SPhilipp Reisner { 5019958c857SPhilipp Reisner struct fifo_buffer *fb; 5029958c857SPhilipp Reisner 5038747d30aSLars Ellenberg fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO); 5049958c857SPhilipp Reisner if (!fb) 5059958c857SPhilipp Reisner return NULL; 5069958c857SPhilipp Reisner 5079958c857SPhilipp Reisner fb->head_index = 0; 5089958c857SPhilipp Reisner fb->size = fifo_size; 5099958c857SPhilipp Reisner fb->total = 0; 5109958c857SPhilipp Reisner 5119958c857SPhilipp Reisner return fb; 5129958c857SPhilipp Reisner } 5139958c857SPhilipp Reisner 5140e49d7b0SLars Ellenberg static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) 515778f271dSPhilipp Reisner { 516daeda1ccSPhilipp Reisner struct disk_conf *dc; 5177f34f614SLars Ellenberg unsigned int want; /* The number of sectors we want in-flight */ 518778f271dSPhilipp Reisner int req_sect; /* Number of sectors to request in this turn */ 5197f34f614SLars Ellenberg int correction; /* Number of sectors more we need in-flight */ 520778f271dSPhilipp Reisner int cps; /* correction per invocation of drbd_rs_controller() */ 521778f271dSPhilipp Reisner int steps; /* Number of time steps to plan ahead */ 522778f271dSPhilipp Reisner int curr_corr; 523778f271dSPhilipp Reisner int max_sect; 524813472ceSPhilipp Reisner struct fifo_buffer *plan; 525778f271dSPhilipp Reisner 526b30ab791SAndreas Gruenbacher dc = rcu_dereference(device->ldev->disk_conf); 527b30ab791SAndreas Gruenbacher plan = rcu_dereference(device->rs_plan_s); 528778f271dSPhilipp Reisner 529813472ceSPhilipp Reisner steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ 530778f271dSPhilipp Reisner 531b30ab791SAndreas Gruenbacher if (device->rs_in_flight + sect_in == 0) { /* At start of resync */ 532daeda1ccSPhilipp Reisner want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; 533778f271dSPhilipp Reisner } else { /* normal path */ 534daeda1ccSPhilipp Reisner want = dc->c_fill_target ? dc->c_fill_target : 535daeda1ccSPhilipp Reisner sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); 536778f271dSPhilipp Reisner } 537778f271dSPhilipp Reisner 538b30ab791SAndreas Gruenbacher correction = want - device->rs_in_flight - plan->total; 539778f271dSPhilipp Reisner 540778f271dSPhilipp Reisner /* Plan ahead */ 541778f271dSPhilipp Reisner cps = correction / steps; 542813472ceSPhilipp Reisner fifo_add_val(plan, cps); 543813472ceSPhilipp Reisner plan->total += cps * steps; 544778f271dSPhilipp Reisner 545778f271dSPhilipp Reisner /* What we do in this step */ 546813472ceSPhilipp Reisner curr_corr = fifo_push(plan, 0); 547813472ceSPhilipp Reisner plan->total -= curr_corr; 548778f271dSPhilipp Reisner 549778f271dSPhilipp Reisner req_sect = sect_in + curr_corr; 550778f271dSPhilipp Reisner if (req_sect < 0) 551778f271dSPhilipp Reisner req_sect = 0; 552778f271dSPhilipp Reisner 553daeda1ccSPhilipp Reisner max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ; 554778f271dSPhilipp Reisner if (req_sect > max_sect) 555778f271dSPhilipp Reisner req_sect = max_sect; 556778f271dSPhilipp Reisner 557778f271dSPhilipp Reisner /* 558d0180171SAndreas Gruenbacher drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", 559b30ab791SAndreas Gruenbacher sect_in, device->rs_in_flight, want, correction, 560b30ab791SAndreas Gruenbacher steps, cps, device->rs_planed, curr_corr, req_sect); 561778f271dSPhilipp Reisner */ 562778f271dSPhilipp Reisner 563778f271dSPhilipp Reisner return req_sect; 564778f271dSPhilipp Reisner } 565778f271dSPhilipp Reisner 566b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device) 567e65f440dSLars Ellenberg { 5680e49d7b0SLars Ellenberg unsigned int sect_in; /* Number of sectors that came in since the last turn */ 5690e49d7b0SLars Ellenberg int number, mxb; 5700e49d7b0SLars Ellenberg 5710e49d7b0SLars Ellenberg sect_in = atomic_xchg(&device->rs_sect_in, 0); 5720e49d7b0SLars Ellenberg device->rs_in_flight -= sect_in; 573813472ceSPhilipp Reisner 574813472ceSPhilipp Reisner rcu_read_lock(); 5750e49d7b0SLars Ellenberg mxb = drbd_get_max_buffers(device) / 2; 576b30ab791SAndreas Gruenbacher if (rcu_dereference(device->rs_plan_s)->size) { 5770e49d7b0SLars Ellenberg number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); 578b30ab791SAndreas Gruenbacher device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; 579e65f440dSLars Ellenberg } else { 580b30ab791SAndreas Gruenbacher device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; 581b30ab791SAndreas Gruenbacher number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 582e65f440dSLars Ellenberg } 583813472ceSPhilipp Reisner rcu_read_unlock(); 584e65f440dSLars Ellenberg 5850e49d7b0SLars Ellenberg /* Don't have more than "max-buffers"/2 in-flight. 5860e49d7b0SLars Ellenberg * Otherwise we may cause the remote site to stall on drbd_alloc_pages(), 5870e49d7b0SLars Ellenberg * potentially causing a distributed deadlock on congestion during 5880e49d7b0SLars Ellenberg * online-verify or (checksum-based) resync, if max-buffers, 5890e49d7b0SLars Ellenberg * socket buffer sizes and resync rate settings are mis-configured. */ 5907f34f614SLars Ellenberg 5917f34f614SLars Ellenberg /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k), 5927f34f614SLars Ellenberg * mxb (as used here, and in drbd_alloc_pages on the peer) is 5937f34f614SLars Ellenberg * "number of pages" (typically also 4k), 5947f34f614SLars Ellenberg * but "rs_in_flight" is in "sectors" (512 Byte). */ 5957f34f614SLars Ellenberg if (mxb - device->rs_in_flight/8 < number) 5967f34f614SLars Ellenberg number = mxb - device->rs_in_flight/8; 5970e49d7b0SLars Ellenberg 598e65f440dSLars Ellenberg return number; 599e65f440dSLars Ellenberg } 600e65f440dSLars Ellenberg 60144a4d551SLars Ellenberg static int make_resync_request(struct drbd_device *const device, int cancel) 602b411b363SPhilipp Reisner { 60344a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 60444a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 605b411b363SPhilipp Reisner unsigned long bit; 606b411b363SPhilipp Reisner sector_t sector; 607b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 6081816a2b4SLars Ellenberg int max_bio_size; 609e65f440dSLars Ellenberg int number, rollback_i, size; 610506afb62SLars Ellenberg int align, requeue = 0; 6110f0601f4SLars Ellenberg int i = 0; 61292d94ae6SPhilipp Reisner int discard_granularity = 0; 613b411b363SPhilipp Reisner 614b411b363SPhilipp Reisner if (unlikely(cancel)) 61599920dc5SAndreas Gruenbacher return 0; 616b411b363SPhilipp Reisner 617b30ab791SAndreas Gruenbacher if (device->rs_total == 0) { 618af85e8e8SLars Ellenberg /* empty resync? */ 619b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 62099920dc5SAndreas Gruenbacher return 0; 621af85e8e8SLars Ellenberg } 622af85e8e8SLars Ellenberg 623b30ab791SAndreas Gruenbacher if (!get_ldev(device)) { 624b30ab791SAndreas Gruenbacher /* Since we only need to access device->rsync a 625b30ab791SAndreas Gruenbacher get_ldev_if_state(device,D_FAILED) would be sufficient, but 626b411b363SPhilipp Reisner to continue resync with a broken disk makes no sense at 627b411b363SPhilipp Reisner all */ 628d0180171SAndreas Gruenbacher drbd_err(device, "Disk broke down during resync!\n"); 62999920dc5SAndreas Gruenbacher return 0; 630b411b363SPhilipp Reisner } 631b411b363SPhilipp Reisner 6329104d31aSLars Ellenberg if (connection->agreed_features & DRBD_FF_THIN_RESYNC) { 63392d94ae6SPhilipp Reisner rcu_read_lock(); 63492d94ae6SPhilipp Reisner discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity; 63592d94ae6SPhilipp Reisner rcu_read_unlock(); 63692d94ae6SPhilipp Reisner } 63792d94ae6SPhilipp Reisner 638b30ab791SAndreas Gruenbacher max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; 639b30ab791SAndreas Gruenbacher number = drbd_rs_number_requests(device); 6400e49d7b0SLars Ellenberg if (number <= 0) 6410f0601f4SLars Ellenberg goto requeue; 642b411b363SPhilipp Reisner 643b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 644506afb62SLars Ellenberg /* Stop generating RS requests when half of the send buffer is filled, 645506afb62SLars Ellenberg * but notify TCP that we'd like to have more space. */ 64644a4d551SLars Ellenberg mutex_lock(&connection->data.mutex); 64744a4d551SLars Ellenberg if (connection->data.socket) { 648506afb62SLars Ellenberg struct sock *sk = connection->data.socket->sk; 649506afb62SLars Ellenberg int queued = sk->sk_wmem_queued; 650506afb62SLars Ellenberg int sndbuf = sk->sk_sndbuf; 651506afb62SLars Ellenberg if (queued > sndbuf / 2) { 652506afb62SLars Ellenberg requeue = 1; 653506afb62SLars Ellenberg if (sk->sk_socket) 654506afb62SLars Ellenberg set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 655b411b363SPhilipp Reisner } 656506afb62SLars Ellenberg } else 657506afb62SLars Ellenberg requeue = 1; 65844a4d551SLars Ellenberg mutex_unlock(&connection->data.mutex); 659506afb62SLars Ellenberg if (requeue) 660b411b363SPhilipp Reisner goto requeue; 661b411b363SPhilipp Reisner 662b411b363SPhilipp Reisner next_sector: 663b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 664b30ab791SAndreas Gruenbacher bit = drbd_bm_find_next(device, device->bm_resync_fo); 665b411b363SPhilipp Reisner 6664b0715f0SLars Ellenberg if (bit == DRBD_END_OF_BITMAP) { 667b30ab791SAndreas Gruenbacher device->bm_resync_fo = drbd_bm_bits(device); 668b30ab791SAndreas Gruenbacher put_ldev(device); 66999920dc5SAndreas Gruenbacher return 0; 670b411b363SPhilipp Reisner } 671b411b363SPhilipp Reisner 672b411b363SPhilipp Reisner sector = BM_BIT_TO_SECT(bit); 673b411b363SPhilipp Reisner 674ad3fee79SLars Ellenberg if (drbd_try_rs_begin_io(device, sector)) { 675b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit; 676b411b363SPhilipp Reisner goto requeue; 677b411b363SPhilipp Reisner } 678b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit + 1; 679b411b363SPhilipp Reisner 680b30ab791SAndreas Gruenbacher if (unlikely(drbd_bm_test_bit(device, bit) == 0)) { 681b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 682b411b363SPhilipp Reisner goto next_sector; 683b411b363SPhilipp Reisner } 684b411b363SPhilipp Reisner 6851816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE 686b411b363SPhilipp Reisner /* try to find some adjacent bits. 687b411b363SPhilipp Reisner * we stop if we have already the maximum req size. 688b411b363SPhilipp Reisner * 689b411b363SPhilipp Reisner * Additionally always align bigger requests, in order to 690b411b363SPhilipp Reisner * be prepared for all stripe sizes of software RAIDs. 691b411b363SPhilipp Reisner */ 692b411b363SPhilipp Reisner align = 1; 693d207450cSPhilipp Reisner rollback_i = i; 6946377b923SLars Ellenberg while (i < number) { 6951816a2b4SLars Ellenberg if (size + BM_BLOCK_SIZE > max_bio_size) 696b411b363SPhilipp Reisner break; 697b411b363SPhilipp Reisner 698b411b363SPhilipp Reisner /* Be always aligned */ 699b411b363SPhilipp Reisner if (sector & ((1<<(align+3))-1)) 700b411b363SPhilipp Reisner break; 701b411b363SPhilipp Reisner 70292d94ae6SPhilipp Reisner if (discard_granularity && size == discard_granularity) 70392d94ae6SPhilipp Reisner break; 70492d94ae6SPhilipp Reisner 705b411b363SPhilipp Reisner /* do not cross extent boundaries */ 706b411b363SPhilipp Reisner if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) 707b411b363SPhilipp Reisner break; 708b411b363SPhilipp Reisner /* now, is it actually dirty, after all? 709b411b363SPhilipp Reisner * caution, drbd_bm_test_bit is tri-state for some 710b411b363SPhilipp Reisner * obscure reason; ( b == 0 ) would get the out-of-band 711b411b363SPhilipp Reisner * only accidentally right because of the "oddly sized" 712b411b363SPhilipp Reisner * adjustment below */ 713b30ab791SAndreas Gruenbacher if (drbd_bm_test_bit(device, bit+1) != 1) 714b411b363SPhilipp Reisner break; 715b411b363SPhilipp Reisner bit++; 716b411b363SPhilipp Reisner size += BM_BLOCK_SIZE; 717b411b363SPhilipp Reisner if ((BM_BLOCK_SIZE << align) <= size) 718b411b363SPhilipp Reisner align++; 719b411b363SPhilipp Reisner i++; 720b411b363SPhilipp Reisner } 721b411b363SPhilipp Reisner /* if we merged some, 722b411b363SPhilipp Reisner * reset the offset to start the next drbd_bm_find_next from */ 723b411b363SPhilipp Reisner if (size > BM_BLOCK_SIZE) 724b30ab791SAndreas Gruenbacher device->bm_resync_fo = bit + 1; 725b411b363SPhilipp Reisner #endif 726b411b363SPhilipp Reisner 727b411b363SPhilipp Reisner /* adjust very last sectors, in case we are oddly sized */ 728b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 729b411b363SPhilipp Reisner size = (capacity-sector)<<9; 730aaaba345SLars Ellenberg 731aaaba345SLars Ellenberg if (device->use_csums) { 73244a4d551SLars Ellenberg switch (read_for_csum(peer_device, sector, size)) { 73380a40e43SLars Ellenberg case -EIO: /* Disk failure */ 734b30ab791SAndreas Gruenbacher put_ldev(device); 73599920dc5SAndreas Gruenbacher return -EIO; 73680a40e43SLars Ellenberg case -EAGAIN: /* allocation failed, or ldev busy */ 737b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 738b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 739d207450cSPhilipp Reisner i = rollback_i; 740b411b363SPhilipp Reisner goto requeue; 74180a40e43SLars Ellenberg case 0: 74280a40e43SLars Ellenberg /* everything ok */ 74380a40e43SLars Ellenberg break; 74480a40e43SLars Ellenberg default: 74580a40e43SLars Ellenberg BUG(); 746b411b363SPhilipp Reisner } 747b411b363SPhilipp Reisner } else { 74899920dc5SAndreas Gruenbacher int err; 74999920dc5SAndreas Gruenbacher 750b30ab791SAndreas Gruenbacher inc_rs_pending(device); 75192d94ae6SPhilipp Reisner err = drbd_send_drequest(peer_device, 75292d94ae6SPhilipp Reisner size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, 75399920dc5SAndreas Gruenbacher sector, size, ID_SYNCER); 75499920dc5SAndreas Gruenbacher if (err) { 755d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); 756b30ab791SAndreas Gruenbacher dec_rs_pending(device); 757b30ab791SAndreas Gruenbacher put_ldev(device); 75899920dc5SAndreas Gruenbacher return err; 759b411b363SPhilipp Reisner } 760b411b363SPhilipp Reisner } 761b411b363SPhilipp Reisner } 762b411b363SPhilipp Reisner 763b30ab791SAndreas Gruenbacher if (device->bm_resync_fo >= drbd_bm_bits(device)) { 764b411b363SPhilipp Reisner /* last syncer _request_ was sent, 765b411b363SPhilipp Reisner * but the P_RS_DATA_REPLY not yet received. sync will end (and 766b411b363SPhilipp Reisner * next sync group will resume), as soon as we receive the last 767b411b363SPhilipp Reisner * resync data block, and the last bit is cleared. 768b411b363SPhilipp Reisner * until then resync "work" is "inactive" ... 769b411b363SPhilipp Reisner */ 770b30ab791SAndreas Gruenbacher put_ldev(device); 77199920dc5SAndreas Gruenbacher return 0; 772b411b363SPhilipp Reisner } 773b411b363SPhilipp Reisner 774b411b363SPhilipp Reisner requeue: 775b30ab791SAndreas Gruenbacher device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 776b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 777b30ab791SAndreas Gruenbacher put_ldev(device); 77899920dc5SAndreas Gruenbacher return 0; 779b411b363SPhilipp Reisner } 780b411b363SPhilipp Reisner 781d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *device, int cancel) 782b411b363SPhilipp Reisner { 783b411b363SPhilipp Reisner int number, i, size; 784b411b363SPhilipp Reisner sector_t sector; 785b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 78658ffa580SLars Ellenberg bool stop_sector_reached = false; 787b411b363SPhilipp Reisner 788b411b363SPhilipp Reisner if (unlikely(cancel)) 789b411b363SPhilipp Reisner return 1; 790b411b363SPhilipp Reisner 791b30ab791SAndreas Gruenbacher number = drbd_rs_number_requests(device); 792b411b363SPhilipp Reisner 793b30ab791SAndreas Gruenbacher sector = device->ov_position; 794b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 79558ffa580SLars Ellenberg if (sector >= capacity) 796b411b363SPhilipp Reisner return 1; 79758ffa580SLars Ellenberg 79858ffa580SLars Ellenberg /* We check for "finished" only in the reply path: 79958ffa580SLars Ellenberg * w_e_end_ov_reply(). 80058ffa580SLars Ellenberg * We need to send at least one request out. */ 80158ffa580SLars Ellenberg stop_sector_reached = i > 0 802b30ab791SAndreas Gruenbacher && verify_can_do_stop_sector(device) 803b30ab791SAndreas Gruenbacher && sector >= device->ov_stop_sector; 80458ffa580SLars Ellenberg if (stop_sector_reached) 80558ffa580SLars Ellenberg break; 806b411b363SPhilipp Reisner 807b411b363SPhilipp Reisner size = BM_BLOCK_SIZE; 808b411b363SPhilipp Reisner 809ad3fee79SLars Ellenberg if (drbd_try_rs_begin_io(device, sector)) { 810b30ab791SAndreas Gruenbacher device->ov_position = sector; 811b411b363SPhilipp Reisner goto requeue; 812b411b363SPhilipp Reisner } 813b411b363SPhilipp Reisner 814b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) 815b411b363SPhilipp Reisner size = (capacity-sector)<<9; 816b411b363SPhilipp Reisner 817b30ab791SAndreas Gruenbacher inc_rs_pending(device); 81869a22773SAndreas Gruenbacher if (drbd_send_ov_request(first_peer_device(device), sector, size)) { 819b30ab791SAndreas Gruenbacher dec_rs_pending(device); 820b411b363SPhilipp Reisner return 0; 821b411b363SPhilipp Reisner } 822b411b363SPhilipp Reisner sector += BM_SECT_PER_BIT; 823b411b363SPhilipp Reisner } 824b30ab791SAndreas Gruenbacher device->ov_position = sector; 825b411b363SPhilipp Reisner 826b411b363SPhilipp Reisner requeue: 827b30ab791SAndreas Gruenbacher device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 82858ffa580SLars Ellenberg if (i == 0 || !stop_sector_reached) 829b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); 830b411b363SPhilipp Reisner return 1; 831b411b363SPhilipp Reisner } 832b411b363SPhilipp Reisner 83399920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel) 834b411b363SPhilipp Reisner { 83584b8c06bSAndreas Gruenbacher struct drbd_device_work *dw = 83684b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device_work, w); 83784b8c06bSAndreas Gruenbacher struct drbd_device *device = dw->device; 83884b8c06bSAndreas Gruenbacher kfree(dw); 839b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 840b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 841b411b363SPhilipp Reisner 84299920dc5SAndreas Gruenbacher return 0; 843b411b363SPhilipp Reisner } 844b411b363SPhilipp Reisner 84599920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel) 846b411b363SPhilipp Reisner { 84784b8c06bSAndreas Gruenbacher struct drbd_device_work *dw = 84884b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device_work, w); 84984b8c06bSAndreas Gruenbacher struct drbd_device *device = dw->device; 85084b8c06bSAndreas Gruenbacher kfree(dw); 851b411b363SPhilipp Reisner 852b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 853b411b363SPhilipp Reisner 85499920dc5SAndreas Gruenbacher return 0; 855b411b363SPhilipp Reisner } 856b411b363SPhilipp Reisner 857b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device) 858af85e8e8SLars Ellenberg { 859a6b32bc3SAndreas Gruenbacher struct drbd_connection *connection = first_peer_device(device)->connection; 8602a67d8b9SPhilipp Reisner 861bde89a9eSAndreas Gruenbacher clear_bit(GOT_PING_ACK, &connection->flags); 862bde89a9eSAndreas Gruenbacher request_ping(connection); 863bde89a9eSAndreas Gruenbacher wait_event(connection->ping_wait, 864bde89a9eSAndreas Gruenbacher test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); 865af85e8e8SLars Ellenberg } 866af85e8e8SLars Ellenberg 867b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device) 868b411b363SPhilipp Reisner { 86926a96110SLars Ellenberg struct drbd_connection *connection = first_peer_device(device)->connection; 870b411b363SPhilipp Reisner unsigned long db, dt, dbdt; 871b411b363SPhilipp Reisner unsigned long n_oos; 872b411b363SPhilipp Reisner union drbd_state os, ns; 87384b8c06bSAndreas Gruenbacher struct drbd_device_work *dw; 874b411b363SPhilipp Reisner char *khelper_cmd = NULL; 87526525618SLars Ellenberg int verify_done = 0; 876b411b363SPhilipp Reisner 877b411b363SPhilipp Reisner /* Remove all elements from the resync LRU. Since future actions 878b411b363SPhilipp Reisner * might set bits in the (main) bitmap, then the entries in the 879b411b363SPhilipp Reisner * resync LRU would be wrong. */ 880b30ab791SAndreas Gruenbacher if (drbd_rs_del_all(device)) { 881b411b363SPhilipp Reisner /* In case this is not possible now, most probably because 882b411b363SPhilipp Reisner * there are P_RS_DATA_REPLY Packets lingering on the worker's 883b411b363SPhilipp Reisner * queue (or even the read operations for those packets 884b411b363SPhilipp Reisner * is not finished by now). Retry in 100ms. */ 885b411b363SPhilipp Reisner 88620ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ / 10); 88784b8c06bSAndreas Gruenbacher dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC); 88884b8c06bSAndreas Gruenbacher if (dw) { 88984b8c06bSAndreas Gruenbacher dw->w.cb = w_resync_finished; 89084b8c06bSAndreas Gruenbacher dw->device = device; 89126a96110SLars Ellenberg drbd_queue_work(&connection->sender_work, &dw->w); 892b411b363SPhilipp Reisner return 1; 893b411b363SPhilipp Reisner } 89484b8c06bSAndreas Gruenbacher drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); 895b411b363SPhilipp Reisner } 896b411b363SPhilipp Reisner 897b30ab791SAndreas Gruenbacher dt = (jiffies - device->rs_start - device->rs_paused) / HZ; 898b411b363SPhilipp Reisner if (dt <= 0) 899b411b363SPhilipp Reisner dt = 1; 90058ffa580SLars Ellenberg 901b30ab791SAndreas Gruenbacher db = device->rs_total; 90258ffa580SLars Ellenberg /* adjust for verify start and stop sectors, respective reached position */ 903b30ab791SAndreas Gruenbacher if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 904b30ab791SAndreas Gruenbacher db -= device->ov_left; 90558ffa580SLars Ellenberg 906b411b363SPhilipp Reisner dbdt = Bit2KB(db/dt); 907b30ab791SAndreas Gruenbacher device->rs_paused /= HZ; 908b411b363SPhilipp Reisner 909b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 910b411b363SPhilipp Reisner goto out; 911b411b363SPhilipp Reisner 912b30ab791SAndreas Gruenbacher ping_peer(device); 913af85e8e8SLars Ellenberg 9140500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 915b30ab791SAndreas Gruenbacher os = drbd_read_state(device); 916b411b363SPhilipp Reisner 91726525618SLars Ellenberg verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); 91826525618SLars Ellenberg 919b411b363SPhilipp Reisner /* This protects us against multiple calls (that can happen in the presence 920b411b363SPhilipp Reisner of application IO), and against connectivity loss just before we arrive here. */ 921b411b363SPhilipp Reisner if (os.conn <= C_CONNECTED) 922b411b363SPhilipp Reisner goto out_unlock; 923b411b363SPhilipp Reisner 924b411b363SPhilipp Reisner ns = os; 925b411b363SPhilipp Reisner ns.conn = C_CONNECTED; 926b411b363SPhilipp Reisner 927d0180171SAndreas Gruenbacher drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 92826525618SLars Ellenberg verify_done ? "Online verify" : "Resync", 929b30ab791SAndreas Gruenbacher dt + device->rs_paused, device->rs_paused, dbdt); 930b411b363SPhilipp Reisner 931b30ab791SAndreas Gruenbacher n_oos = drbd_bm_total_weight(device); 932b411b363SPhilipp Reisner 933b411b363SPhilipp Reisner if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { 934b411b363SPhilipp Reisner if (n_oos) { 935d0180171SAndreas Gruenbacher drbd_alert(device, "Online verify found %lu %dk block out of sync!\n", 936b411b363SPhilipp Reisner n_oos, Bit2KB(1)); 937b411b363SPhilipp Reisner khelper_cmd = "out-of-sync"; 938b411b363SPhilipp Reisner } 939b411b363SPhilipp Reisner } else { 9400b0ba1efSAndreas Gruenbacher D_ASSERT(device, (n_oos - device->rs_failed) == 0); 941b411b363SPhilipp Reisner 942b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) 943b411b363SPhilipp Reisner khelper_cmd = "after-resync-target"; 944b411b363SPhilipp Reisner 945aaaba345SLars Ellenberg if (device->use_csums && device->rs_total) { 946b30ab791SAndreas Gruenbacher const unsigned long s = device->rs_same_csum; 947b30ab791SAndreas Gruenbacher const unsigned long t = device->rs_total; 948b411b363SPhilipp Reisner const int ratio = 949b411b363SPhilipp Reisner (t == 0) ? 0 : 950b411b363SPhilipp Reisner (t < 100000) ? ((s*100)/t) : (s/(t/100)); 951d0180171SAndreas Gruenbacher drbd_info(device, "%u %% had equal checksums, eliminated: %luK; " 952b411b363SPhilipp Reisner "transferred %luK total %luK\n", 953b411b363SPhilipp Reisner ratio, 954b30ab791SAndreas Gruenbacher Bit2KB(device->rs_same_csum), 955b30ab791SAndreas Gruenbacher Bit2KB(device->rs_total - device->rs_same_csum), 956b30ab791SAndreas Gruenbacher Bit2KB(device->rs_total)); 957b411b363SPhilipp Reisner } 958b411b363SPhilipp Reisner } 959b411b363SPhilipp Reisner 960b30ab791SAndreas Gruenbacher if (device->rs_failed) { 961d0180171SAndreas Gruenbacher drbd_info(device, " %lu failed blocks\n", device->rs_failed); 962b411b363SPhilipp Reisner 963b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 964b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 965b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 966b411b363SPhilipp Reisner } else { 967b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 968b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 969b411b363SPhilipp Reisner } 970b411b363SPhilipp Reisner } else { 971b411b363SPhilipp Reisner ns.disk = D_UP_TO_DATE; 972b411b363SPhilipp Reisner ns.pdsk = D_UP_TO_DATE; 973b411b363SPhilipp Reisner 974b411b363SPhilipp Reisner if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { 975b30ab791SAndreas Gruenbacher if (device->p_uuid) { 976b411b363SPhilipp Reisner int i; 977b411b363SPhilipp Reisner for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) 978b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, i, device->p_uuid[i]); 979b30ab791SAndreas Gruenbacher drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]); 980b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]); 981b411b363SPhilipp Reisner } else { 982d0180171SAndreas Gruenbacher drbd_err(device, "device->p_uuid is NULL! BUG\n"); 983b411b363SPhilipp Reisner } 984b411b363SPhilipp Reisner } 985b411b363SPhilipp Reisner 98662b0da3aSLars Ellenberg if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { 98762b0da3aSLars Ellenberg /* for verify runs, we don't update uuids here, 98862b0da3aSLars Ellenberg * so there would be nothing to report. */ 989b30ab791SAndreas Gruenbacher drbd_uuid_set_bm(device, 0UL); 990b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "updated UUIDs"); 991b30ab791SAndreas Gruenbacher if (device->p_uuid) { 992b411b363SPhilipp Reisner /* Now the two UUID sets are equal, update what we 993b411b363SPhilipp Reisner * know of the peer. */ 994b411b363SPhilipp Reisner int i; 995b411b363SPhilipp Reisner for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) 996b30ab791SAndreas Gruenbacher device->p_uuid[i] = device->ldev->md.uuid[i]; 997b411b363SPhilipp Reisner } 998b411b363SPhilipp Reisner } 99962b0da3aSLars Ellenberg } 1000b411b363SPhilipp Reisner 1001b30ab791SAndreas Gruenbacher _drbd_set_state(device, ns, CS_VERBOSE, NULL); 1002b411b363SPhilipp Reisner out_unlock: 10030500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 100426a96110SLars Ellenberg 100526a96110SLars Ellenberg /* If we have been sync source, and have an effective fencing-policy, 100626a96110SLars Ellenberg * once *all* volumes are back in sync, call "unfence". */ 100726a96110SLars Ellenberg if (os.conn == C_SYNC_SOURCE) { 100826a96110SLars Ellenberg enum drbd_disk_state disk_state = D_MASK; 100926a96110SLars Ellenberg enum drbd_disk_state pdsk_state = D_MASK; 101026a96110SLars Ellenberg enum drbd_fencing_p fp = FP_DONT_CARE; 101126a96110SLars Ellenberg 101226a96110SLars Ellenberg rcu_read_lock(); 101326a96110SLars Ellenberg fp = rcu_dereference(device->ldev->disk_conf)->fencing; 101426a96110SLars Ellenberg if (fp != FP_DONT_CARE) { 101526a96110SLars Ellenberg struct drbd_peer_device *peer_device; 101626a96110SLars Ellenberg int vnr; 101726a96110SLars Ellenberg idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 101826a96110SLars Ellenberg struct drbd_device *device = peer_device->device; 101926a96110SLars Ellenberg disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk); 102026a96110SLars Ellenberg pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk); 102126a96110SLars Ellenberg } 102226a96110SLars Ellenberg } 102326a96110SLars Ellenberg rcu_read_unlock(); 102426a96110SLars Ellenberg if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE) 102526a96110SLars Ellenberg conn_khelper(connection, "unfence-peer"); 102626a96110SLars Ellenberg } 102726a96110SLars Ellenberg 1028b30ab791SAndreas Gruenbacher put_ldev(device); 1029b411b363SPhilipp Reisner out: 1030b30ab791SAndreas Gruenbacher device->rs_total = 0; 1031b30ab791SAndreas Gruenbacher device->rs_failed = 0; 1032b30ab791SAndreas Gruenbacher device->rs_paused = 0; 103358ffa580SLars Ellenberg 103458ffa580SLars Ellenberg /* reset start sector, if we reached end of device */ 1035b30ab791SAndreas Gruenbacher if (verify_done && device->ov_left == 0) 1036b30ab791SAndreas Gruenbacher device->ov_start_sector = 0; 1037b411b363SPhilipp Reisner 1038b30ab791SAndreas Gruenbacher drbd_md_sync(device); 103913d42685SLars Ellenberg 1040b411b363SPhilipp Reisner if (khelper_cmd) 1041b30ab791SAndreas Gruenbacher drbd_khelper(device, khelper_cmd); 1042b411b363SPhilipp Reisner 1043b411b363SPhilipp Reisner return 1; 1044b411b363SPhilipp Reisner } 1045b411b363SPhilipp Reisner 1046b411b363SPhilipp Reisner /* helper */ 1047b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) 1048b411b363SPhilipp Reisner { 1049045417f7SAndreas Gruenbacher if (drbd_peer_req_has_active_page(peer_req)) { 1050b411b363SPhilipp Reisner /* This might happen if sendpage() has not finished */ 1051db830c46SAndreas Gruenbacher int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; 1052b30ab791SAndreas Gruenbacher atomic_add(i, &device->pp_in_use_by_net); 1053b30ab791SAndreas Gruenbacher atomic_sub(i, &device->pp_in_use); 10540500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1055a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->net_ee); 10560500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1057435f0740SLars Ellenberg wake_up(&drbd_pp_wait); 1058b411b363SPhilipp Reisner } else 1059b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1060b411b363SPhilipp Reisner } 1061b411b363SPhilipp Reisner 1062b411b363SPhilipp Reisner /** 1063b411b363SPhilipp Reisner * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST 1064b411b363SPhilipp Reisner * @w: work object. 1065b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1066b411b363SPhilipp Reisner */ 106799920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel) 1068b411b363SPhilipp Reisner { 1069a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 10706780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 10716780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 107299920dc5SAndreas Gruenbacher int err; 1073b411b363SPhilipp Reisner 1074b411b363SPhilipp Reisner if (unlikely(cancel)) { 1075b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1076b30ab791SAndreas Gruenbacher dec_unacked(device); 107799920dc5SAndreas Gruenbacher return 0; 1078b411b363SPhilipp Reisner } 1079b411b363SPhilipp Reisner 1080db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 10816780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req); 1082b411b363SPhilipp Reisner } else { 1083b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1084d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegDReply. sector=%llus.\n", 1085db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 1086b411b363SPhilipp Reisner 10876780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); 1088b411b363SPhilipp Reisner } 1089b411b363SPhilipp Reisner 1090b30ab791SAndreas Gruenbacher dec_unacked(device); 1091b411b363SPhilipp Reisner 1092b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1093b411b363SPhilipp Reisner 109499920dc5SAndreas Gruenbacher if (unlikely(err)) 1095d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block() failed\n"); 109699920dc5SAndreas Gruenbacher return err; 1097b411b363SPhilipp Reisner } 1098b411b363SPhilipp Reisner 1099700ca8c0SPhilipp Reisner static bool all_zero(struct drbd_peer_request *peer_req) 1100700ca8c0SPhilipp Reisner { 1101700ca8c0SPhilipp Reisner struct page *page = peer_req->pages; 1102700ca8c0SPhilipp Reisner unsigned int len = peer_req->i.size; 1103700ca8c0SPhilipp Reisner 1104700ca8c0SPhilipp Reisner page_chain_for_each(page) { 1105700ca8c0SPhilipp Reisner unsigned int l = min_t(unsigned int, len, PAGE_SIZE); 1106700ca8c0SPhilipp Reisner unsigned int i, words = l / sizeof(long); 1107700ca8c0SPhilipp Reisner unsigned long *d; 1108700ca8c0SPhilipp Reisner 1109700ca8c0SPhilipp Reisner d = kmap_atomic(page); 1110700ca8c0SPhilipp Reisner for (i = 0; i < words; i++) { 1111700ca8c0SPhilipp Reisner if (d[i]) { 1112700ca8c0SPhilipp Reisner kunmap_atomic(d); 1113700ca8c0SPhilipp Reisner return false; 1114700ca8c0SPhilipp Reisner } 1115700ca8c0SPhilipp Reisner } 1116700ca8c0SPhilipp Reisner kunmap_atomic(d); 1117700ca8c0SPhilipp Reisner len -= l; 1118700ca8c0SPhilipp Reisner } 1119700ca8c0SPhilipp Reisner 1120700ca8c0SPhilipp Reisner return true; 1121700ca8c0SPhilipp Reisner } 1122700ca8c0SPhilipp Reisner 1123b411b363SPhilipp Reisner /** 1124a209b4aeSAndreas Gruenbacher * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST 1125b411b363SPhilipp Reisner * @w: work object. 1126b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1127b411b363SPhilipp Reisner */ 112899920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel) 1129b411b363SPhilipp Reisner { 1130a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11316780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 11326780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 113399920dc5SAndreas Gruenbacher int err; 1134b411b363SPhilipp Reisner 1135b411b363SPhilipp Reisner if (unlikely(cancel)) { 1136b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1137b30ab791SAndreas Gruenbacher dec_unacked(device); 113899920dc5SAndreas Gruenbacher return 0; 1139b411b363SPhilipp Reisner } 1140b411b363SPhilipp Reisner 1141b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_FAILED)) { 1142b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1143b30ab791SAndreas Gruenbacher put_ldev(device); 1144b411b363SPhilipp Reisner } 1145b411b363SPhilipp Reisner 1146b30ab791SAndreas Gruenbacher if (device->state.conn == C_AHEAD) { 11476780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); 1148db830c46SAndreas Gruenbacher } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1149b30ab791SAndreas Gruenbacher if (likely(device->state.pdsk >= D_INCONSISTENT)) { 1150b30ab791SAndreas Gruenbacher inc_rs_pending(device); 1151700ca8c0SPhilipp Reisner if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) 1152700ca8c0SPhilipp Reisner err = drbd_send_rs_deallocated(peer_device, peer_req); 1153700ca8c0SPhilipp Reisner else 11546780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 1155b411b363SPhilipp Reisner } else { 1156b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1157d0180171SAndreas Gruenbacher drbd_err(device, "Not sending RSDataReply, " 1158b411b363SPhilipp Reisner "partner DISKLESS!\n"); 115999920dc5SAndreas Gruenbacher err = 0; 1160b411b363SPhilipp Reisner } 1161b411b363SPhilipp Reisner } else { 1162b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1163d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegRSDReply. sector %llus.\n", 1164db830c46SAndreas Gruenbacher (unsigned long long)peer_req->i.sector); 1165b411b363SPhilipp Reisner 11666780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 1167b411b363SPhilipp Reisner 1168b411b363SPhilipp Reisner /* update resync data with failure */ 1169b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); 1170b411b363SPhilipp Reisner } 1171b411b363SPhilipp Reisner 1172b30ab791SAndreas Gruenbacher dec_unacked(device); 1173b411b363SPhilipp Reisner 1174b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1175b411b363SPhilipp Reisner 117699920dc5SAndreas Gruenbacher if (unlikely(err)) 1177d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block() failed\n"); 117899920dc5SAndreas Gruenbacher return err; 1179b411b363SPhilipp Reisner } 1180b411b363SPhilipp Reisner 118199920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) 1182b411b363SPhilipp Reisner { 1183a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 11846780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 11856780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1186b411b363SPhilipp Reisner struct digest_info *di; 1187b411b363SPhilipp Reisner int digest_size; 1188b411b363SPhilipp Reisner void *digest = NULL; 118999920dc5SAndreas Gruenbacher int err, eq = 0; 1190b411b363SPhilipp Reisner 1191b411b363SPhilipp Reisner if (unlikely(cancel)) { 1192b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1193b30ab791SAndreas Gruenbacher dec_unacked(device); 119499920dc5SAndreas Gruenbacher return 0; 1195b411b363SPhilipp Reisner } 1196b411b363SPhilipp Reisner 1197b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1198b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1199b30ab791SAndreas Gruenbacher put_ldev(device); 12001d53f09eSLars Ellenberg } 1201b411b363SPhilipp Reisner 1202db830c46SAndreas Gruenbacher di = peer_req->digest; 1203b411b363SPhilipp Reisner 1204db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1205b411b363SPhilipp Reisner /* quick hack to try to avoid a race against reconfiguration. 1206b411b363SPhilipp Reisner * a real fix would be much more involved, 1207b411b363SPhilipp Reisner * introducing more locking mechanisms */ 12086780139cSAndreas Gruenbacher if (peer_device->connection->csums_tfm) { 12093d0e6375SKees Cook digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm); 12100b0ba1efSAndreas Gruenbacher D_ASSERT(device, digest_size == di->digest_size); 1211b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1212b411b363SPhilipp Reisner } 1213b411b363SPhilipp Reisner if (digest) { 12146780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); 1215b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1216b411b363SPhilipp Reisner kfree(digest); 1217b411b363SPhilipp Reisner } 1218b411b363SPhilipp Reisner 1219b411b363SPhilipp Reisner if (eq) { 1220b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); 1221676396d5SLars Ellenberg /* rs_same_csums unit is BM_BLOCK_SIZE */ 1222b30ab791SAndreas Gruenbacher device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; 12236780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); 1224b411b363SPhilipp Reisner } else { 1225b30ab791SAndreas Gruenbacher inc_rs_pending(device); 1226db830c46SAndreas Gruenbacher peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ 1227db830c46SAndreas Gruenbacher peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ 1228204bba99SPhilipp Reisner kfree(di); 12296780139cSAndreas Gruenbacher err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); 1230b411b363SPhilipp Reisner } 1231b411b363SPhilipp Reisner } else { 12326780139cSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); 1233b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1234d0180171SAndreas Gruenbacher drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); 1235b411b363SPhilipp Reisner } 1236b411b363SPhilipp Reisner 1237b30ab791SAndreas Gruenbacher dec_unacked(device); 1238b30ab791SAndreas Gruenbacher move_to_net_ee_or_free(device, peer_req); 1239b411b363SPhilipp Reisner 124099920dc5SAndreas Gruenbacher if (unlikely(err)) 1241d0180171SAndreas Gruenbacher drbd_err(device, "drbd_send_block/ack() failed\n"); 124299920dc5SAndreas Gruenbacher return err; 1243b411b363SPhilipp Reisner } 1244b411b363SPhilipp Reisner 124599920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel) 1246b411b363SPhilipp Reisner { 1247a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 12486780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 12496780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1250db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1251db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 1252b411b363SPhilipp Reisner int digest_size; 1253b411b363SPhilipp Reisner void *digest; 125499920dc5SAndreas Gruenbacher int err = 0; 1255b411b363SPhilipp Reisner 1256b411b363SPhilipp Reisner if (unlikely(cancel)) 1257b411b363SPhilipp Reisner goto out; 1258b411b363SPhilipp Reisner 12593d0e6375SKees Cook digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm); 1260b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 12618f21420eSPhilipp Reisner if (!digest) { 126299920dc5SAndreas Gruenbacher err = 1; /* terminate the connection in case the allocation failed */ 12638f21420eSPhilipp Reisner goto out; 12648f21420eSPhilipp Reisner } 12658f21420eSPhilipp Reisner 1266db830c46SAndreas Gruenbacher if (likely(!(peer_req->flags & EE_WAS_ERROR))) 12676780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 12688f21420eSPhilipp Reisner else 12698f21420eSPhilipp Reisner memset(digest, 0, digest_size); 12708f21420eSPhilipp Reisner 127153ea4331SLars Ellenberg /* Free e and pages before send. 127253ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 127353ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 127453ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 1275c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 1276b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1277db830c46SAndreas Gruenbacher peer_req = NULL; 1278b30ab791SAndreas Gruenbacher inc_rs_pending(device); 12796780139cSAndreas Gruenbacher err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); 128099920dc5SAndreas Gruenbacher if (err) 1281b30ab791SAndreas Gruenbacher dec_rs_pending(device); 1282b411b363SPhilipp Reisner kfree(digest); 1283b411b363SPhilipp Reisner 1284b411b363SPhilipp Reisner out: 1285db830c46SAndreas Gruenbacher if (peer_req) 1286b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1287b30ab791SAndreas Gruenbacher dec_unacked(device); 128899920dc5SAndreas Gruenbacher return err; 1289b411b363SPhilipp Reisner } 1290b411b363SPhilipp Reisner 1291b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) 1292b411b363SPhilipp Reisner { 1293b30ab791SAndreas Gruenbacher if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { 1294b30ab791SAndreas Gruenbacher device->ov_last_oos_size += size>>9; 1295b411b363SPhilipp Reisner } else { 1296b30ab791SAndreas Gruenbacher device->ov_last_oos_start = sector; 1297b30ab791SAndreas Gruenbacher device->ov_last_oos_size = size>>9; 1298b411b363SPhilipp Reisner } 1299b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, sector, size); 1300b411b363SPhilipp Reisner } 1301b411b363SPhilipp Reisner 130299920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel) 1303b411b363SPhilipp Reisner { 1304a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); 13056780139cSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 13066780139cSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1307b411b363SPhilipp Reisner struct digest_info *di; 1308b411b363SPhilipp Reisner void *digest; 1309db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1310db830c46SAndreas Gruenbacher unsigned int size = peer_req->i.size; 131153ea4331SLars Ellenberg int digest_size; 131299920dc5SAndreas Gruenbacher int err, eq = 0; 131358ffa580SLars Ellenberg bool stop_sector_reached = false; 1314b411b363SPhilipp Reisner 1315b411b363SPhilipp Reisner if (unlikely(cancel)) { 1316b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1317b30ab791SAndreas Gruenbacher dec_unacked(device); 131899920dc5SAndreas Gruenbacher return 0; 1319b411b363SPhilipp Reisner } 1320b411b363SPhilipp Reisner 1321b411b363SPhilipp Reisner /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all 1322b411b363SPhilipp Reisner * the resync lru has been cleaned up already */ 1323b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1324b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, peer_req->i.sector); 1325b30ab791SAndreas Gruenbacher put_ldev(device); 13261d53f09eSLars Ellenberg } 1327b411b363SPhilipp Reisner 1328db830c46SAndreas Gruenbacher di = peer_req->digest; 1329b411b363SPhilipp Reisner 1330db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 13313d0e6375SKees Cook digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm); 1332b411b363SPhilipp Reisner digest = kmalloc(digest_size, GFP_NOIO); 1333b411b363SPhilipp Reisner if (digest) { 13346780139cSAndreas Gruenbacher drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); 1335b411b363SPhilipp Reisner 13360b0ba1efSAndreas Gruenbacher D_ASSERT(device, digest_size == di->digest_size); 1337b411b363SPhilipp Reisner eq = !memcmp(digest, di->digest, digest_size); 1338b411b363SPhilipp Reisner kfree(digest); 1339b411b363SPhilipp Reisner } 1340b411b363SPhilipp Reisner } 1341b411b363SPhilipp Reisner 13429676c760SLars Ellenberg /* Free peer_req and pages before send. 134353ea4331SLars Ellenberg * In case we block on congestion, we could otherwise run into 134453ea4331SLars Ellenberg * some distributed deadlock, if the other side blocks on 134553ea4331SLars Ellenberg * congestion as well, because our receiver blocks in 1346c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages due to pp_in_use > max_buffers. */ 1347b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1348b411b363SPhilipp Reisner if (!eq) 1349b30ab791SAndreas Gruenbacher drbd_ov_out_of_sync_found(device, sector, size); 1350b411b363SPhilipp Reisner else 1351b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 1352b411b363SPhilipp Reisner 13536780139cSAndreas Gruenbacher err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, 1354b411b363SPhilipp Reisner eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); 1355b411b363SPhilipp Reisner 1356b30ab791SAndreas Gruenbacher dec_unacked(device); 1357b411b363SPhilipp Reisner 1358b30ab791SAndreas Gruenbacher --device->ov_left; 1359ea5442afSLars Ellenberg 1360ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */ 1361b30ab791SAndreas Gruenbacher if ((device->ov_left & 0x200) == 0x200) 1362b30ab791SAndreas Gruenbacher drbd_advance_rs_marks(device, device->ov_left); 1363ea5442afSLars Ellenberg 1364b30ab791SAndreas Gruenbacher stop_sector_reached = verify_can_do_stop_sector(device) && 1365b30ab791SAndreas Gruenbacher (sector + (size>>9)) >= device->ov_stop_sector; 136658ffa580SLars Ellenberg 1367b30ab791SAndreas Gruenbacher if (device->ov_left == 0 || stop_sector_reached) { 1368b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 1369b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 1370b411b363SPhilipp Reisner } 1371b411b363SPhilipp Reisner 137299920dc5SAndreas Gruenbacher return err; 1373b411b363SPhilipp Reisner } 1374b411b363SPhilipp Reisner 1375b6dd1a89SLars Ellenberg /* FIXME 1376b6dd1a89SLars Ellenberg * We need to track the number of pending barrier acks, 1377b6dd1a89SLars Ellenberg * and to be able to wait for them. 1378b6dd1a89SLars Ellenberg * See also comment in drbd_adm_attach before drbd_suspend_io. 1379b6dd1a89SLars Ellenberg */ 1380bde89a9eSAndreas Gruenbacher static int drbd_send_barrier(struct drbd_connection *connection) 1381b411b363SPhilipp Reisner { 13829f5bdc33SAndreas Gruenbacher struct p_barrier *p; 1383b6dd1a89SLars Ellenberg struct drbd_socket *sock; 1384b411b363SPhilipp Reisner 1385bde89a9eSAndreas Gruenbacher sock = &connection->data; 1386bde89a9eSAndreas Gruenbacher p = conn_prepare_command(connection, sock); 13879f5bdc33SAndreas Gruenbacher if (!p) 13889f5bdc33SAndreas Gruenbacher return -EIO; 1389bde89a9eSAndreas Gruenbacher p->barrier = connection->send.current_epoch_nr; 1390b6dd1a89SLars Ellenberg p->pad = 0; 1391bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes = 0; 139284d34f2fSLars Ellenberg connection->send.last_sent_barrier_jif = jiffies; 1393b6dd1a89SLars Ellenberg 1394bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); 1395b411b363SPhilipp Reisner } 1396b411b363SPhilipp Reisner 1397c51a0ef3SLars Ellenberg static int pd_send_unplug_remote(struct drbd_peer_device *pd) 1398c51a0ef3SLars Ellenberg { 1399c51a0ef3SLars Ellenberg struct drbd_socket *sock = &pd->connection->data; 1400c51a0ef3SLars Ellenberg if (!drbd_prepare_command(pd, sock)) 1401c51a0ef3SLars Ellenberg return -EIO; 1402c51a0ef3SLars Ellenberg return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0); 1403c51a0ef3SLars Ellenberg } 1404c51a0ef3SLars Ellenberg 140599920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel) 1406b411b363SPhilipp Reisner { 140784b8c06bSAndreas Gruenbacher struct drbd_device *device = 140884b8c06bSAndreas Gruenbacher container_of(w, struct drbd_device, unplug_work); 14099f5bdc33SAndreas Gruenbacher 1410b411b363SPhilipp Reisner if (cancel) 141199920dc5SAndreas Gruenbacher return 0; 1412c51a0ef3SLars Ellenberg return pd_send_unplug_remote(first_peer_device(device)); 1413b411b363SPhilipp Reisner } 1414b411b363SPhilipp Reisner 1415bde89a9eSAndreas Gruenbacher static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch) 14164eb9b3cbSLars Ellenberg { 1417bde89a9eSAndreas Gruenbacher if (!connection->send.seen_any_write_yet) { 1418bde89a9eSAndreas Gruenbacher connection->send.seen_any_write_yet = true; 1419bde89a9eSAndreas Gruenbacher connection->send.current_epoch_nr = epoch; 1420bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes = 0; 142184d34f2fSLars Ellenberg connection->send.last_sent_barrier_jif = jiffies; 14224eb9b3cbSLars Ellenberg } 14234eb9b3cbSLars Ellenberg } 14244eb9b3cbSLars Ellenberg 1425bde89a9eSAndreas Gruenbacher static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch) 14264eb9b3cbSLars Ellenberg { 14274eb9b3cbSLars Ellenberg /* re-init if first write on this connection */ 1428bde89a9eSAndreas Gruenbacher if (!connection->send.seen_any_write_yet) 14294eb9b3cbSLars Ellenberg return; 1430bde89a9eSAndreas Gruenbacher if (connection->send.current_epoch_nr != epoch) { 1431bde89a9eSAndreas Gruenbacher if (connection->send.current_epoch_writes) 1432bde89a9eSAndreas Gruenbacher drbd_send_barrier(connection); 1433bde89a9eSAndreas Gruenbacher connection->send.current_epoch_nr = epoch; 14344eb9b3cbSLars Ellenberg } 14354eb9b3cbSLars Ellenberg } 14364eb9b3cbSLars Ellenberg 14378f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel) 143873a01a18SPhilipp Reisner { 143973a01a18SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 144084b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 144144a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 144244a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device->connection; 144399920dc5SAndreas Gruenbacher int err; 144473a01a18SPhilipp Reisner 144573a01a18SPhilipp Reisner if (unlikely(cancel)) { 14468554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 144799920dc5SAndreas Gruenbacher return 0; 144873a01a18SPhilipp Reisner } 1449e5f891b2SLars Ellenberg req->pre_send_jif = jiffies; 145073a01a18SPhilipp Reisner 1451bde89a9eSAndreas Gruenbacher /* this time, no connection->send.current_epoch_writes++; 1452b6dd1a89SLars Ellenberg * If it was sent, it was the closing barrier for the last 1453b6dd1a89SLars Ellenberg * replicated epoch, before we went into AHEAD mode. 1454b6dd1a89SLars Ellenberg * No more barriers will be sent, until we leave AHEAD mode again. */ 1455bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1456b6dd1a89SLars Ellenberg 145744a4d551SLars Ellenberg err = drbd_send_out_of_sync(peer_device, req); 14588554df1cSAndreas Gruenbacher req_mod(req, OOS_HANDED_TO_NETWORK); 145973a01a18SPhilipp Reisner 146099920dc5SAndreas Gruenbacher return err; 146173a01a18SPhilipp Reisner } 146273a01a18SPhilipp Reisner 1463b411b363SPhilipp Reisner /** 1464b411b363SPhilipp Reisner * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request 1465b411b363SPhilipp Reisner * @w: work object. 1466b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1467b411b363SPhilipp Reisner */ 146899920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel) 1469b411b363SPhilipp Reisner { 1470b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 147184b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 147244a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 147344a4d551SLars Ellenberg struct drbd_connection *connection = peer_device->connection; 1474c51a0ef3SLars Ellenberg bool do_send_unplug = req->rq_state & RQ_UNPLUG; 147599920dc5SAndreas Gruenbacher int err; 1476b411b363SPhilipp Reisner 1477b411b363SPhilipp Reisner if (unlikely(cancel)) { 14788554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 147999920dc5SAndreas Gruenbacher return 0; 1480b411b363SPhilipp Reisner } 1481e5f891b2SLars Ellenberg req->pre_send_jif = jiffies; 1482b411b363SPhilipp Reisner 1483bde89a9eSAndreas Gruenbacher re_init_if_first_write(connection, req->epoch); 1484bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1485bde89a9eSAndreas Gruenbacher connection->send.current_epoch_writes++; 1486b6dd1a89SLars Ellenberg 148744a4d551SLars Ellenberg err = drbd_send_dblock(peer_device, req); 148899920dc5SAndreas Gruenbacher req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1489b411b363SPhilipp Reisner 1490c51a0ef3SLars Ellenberg if (do_send_unplug && !err) 1491c51a0ef3SLars Ellenberg pd_send_unplug_remote(peer_device); 1492c51a0ef3SLars Ellenberg 149399920dc5SAndreas Gruenbacher return err; 1494b411b363SPhilipp Reisner } 1495b411b363SPhilipp Reisner 1496b411b363SPhilipp Reisner /** 1497b411b363SPhilipp Reisner * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet 1498b411b363SPhilipp Reisner * @w: work object. 1499b411b363SPhilipp Reisner * @cancel: The connection will be closed anyways 1500b411b363SPhilipp Reisner */ 150199920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel) 1502b411b363SPhilipp Reisner { 1503b411b363SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 150484b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 150544a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 150644a4d551SLars Ellenberg struct drbd_connection *connection = peer_device->connection; 1507c51a0ef3SLars Ellenberg bool do_send_unplug = req->rq_state & RQ_UNPLUG; 150899920dc5SAndreas Gruenbacher int err; 1509b411b363SPhilipp Reisner 1510b411b363SPhilipp Reisner if (unlikely(cancel)) { 15118554df1cSAndreas Gruenbacher req_mod(req, SEND_CANCELED); 151299920dc5SAndreas Gruenbacher return 0; 1513b411b363SPhilipp Reisner } 1514e5f891b2SLars Ellenberg req->pre_send_jif = jiffies; 1515b411b363SPhilipp Reisner 1516b6dd1a89SLars Ellenberg /* Even read requests may close a write epoch, 1517b6dd1a89SLars Ellenberg * if there was any yet. */ 1518bde89a9eSAndreas Gruenbacher maybe_send_barrier(connection, req->epoch); 1519b6dd1a89SLars Ellenberg 152044a4d551SLars Ellenberg err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, 1521b411b363SPhilipp Reisner (unsigned long)req); 1522b411b363SPhilipp Reisner 152399920dc5SAndreas Gruenbacher req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1524b411b363SPhilipp Reisner 1525c51a0ef3SLars Ellenberg if (do_send_unplug && !err) 1526c51a0ef3SLars Ellenberg pd_send_unplug_remote(peer_device); 1527c51a0ef3SLars Ellenberg 152899920dc5SAndreas Gruenbacher return err; 1529b411b363SPhilipp Reisner } 1530b411b363SPhilipp Reisner 153199920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel) 1532265be2d0SPhilipp Reisner { 1533265be2d0SPhilipp Reisner struct drbd_request *req = container_of(w, struct drbd_request, w); 153484b8c06bSAndreas Gruenbacher struct drbd_device *device = req->device; 1535265be2d0SPhilipp Reisner 15360778286aSPhilipp Reisner if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) 15374dd726f0SLars Ellenberg drbd_al_begin_io(device, &req->i); 1538265be2d0SPhilipp Reisner 1539265be2d0SPhilipp Reisner drbd_req_make_private_bio(req, req->master_bio); 154074d46992SChristoph Hellwig bio_set_dev(req->private_bio, device->ldev->backing_bdev); 1541265be2d0SPhilipp Reisner generic_make_request(req->private_bio); 1542265be2d0SPhilipp Reisner 154399920dc5SAndreas Gruenbacher return 0; 1544265be2d0SPhilipp Reisner } 1545265be2d0SPhilipp Reisner 1546b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device) 1547b411b363SPhilipp Reisner { 1548b30ab791SAndreas Gruenbacher struct drbd_device *odev = device; 154995f8efd0SAndreas Gruenbacher int resync_after; 1550b411b363SPhilipp Reisner 1551b411b363SPhilipp Reisner while (1) { 1552a3f8f7dcSLars Ellenberg if (!odev->ldev || odev->state.disk == D_DISKLESS) 1553438c8374SPhilipp Reisner return 1; 1554daeda1ccSPhilipp Reisner rcu_read_lock(); 155595f8efd0SAndreas Gruenbacher resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 1556daeda1ccSPhilipp Reisner rcu_read_unlock(); 155795f8efd0SAndreas Gruenbacher if (resync_after == -1) 1558b411b363SPhilipp Reisner return 1; 1559b30ab791SAndreas Gruenbacher odev = minor_to_device(resync_after); 1560a3f8f7dcSLars Ellenberg if (!odev) 1561841ce241SAndreas Gruenbacher return 1; 1562b411b363SPhilipp Reisner if ((odev->state.conn >= C_SYNC_SOURCE && 1563b411b363SPhilipp Reisner odev->state.conn <= C_PAUSED_SYNC_T) || 1564b411b363SPhilipp Reisner odev->state.aftr_isp || odev->state.peer_isp || 1565b411b363SPhilipp Reisner odev->state.user_isp) 1566b411b363SPhilipp Reisner return 0; 1567b411b363SPhilipp Reisner } 1568b411b363SPhilipp Reisner } 1569b411b363SPhilipp Reisner 1570b411b363SPhilipp Reisner /** 157128bc3b8cSAndreas Gruenbacher * drbd_pause_after() - Pause resync on all devices that may not resync now 1572b30ab791SAndreas Gruenbacher * @device: DRBD device. 1573b411b363SPhilipp Reisner * 1574b411b363SPhilipp Reisner * Called from process context only (admin command and after_state_ch). 1575b411b363SPhilipp Reisner */ 157628bc3b8cSAndreas Gruenbacher static bool drbd_pause_after(struct drbd_device *device) 1577b411b363SPhilipp Reisner { 157828bc3b8cSAndreas Gruenbacher bool changed = false; 157954761697SAndreas Gruenbacher struct drbd_device *odev; 158028bc3b8cSAndreas Gruenbacher int i; 1581b411b363SPhilipp Reisner 1582695d08faSPhilipp Reisner rcu_read_lock(); 158305a10ec7SAndreas Gruenbacher idr_for_each_entry(&drbd_devices, odev, i) { 1584b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1585b411b363SPhilipp Reisner continue; 158628bc3b8cSAndreas Gruenbacher if (!_drbd_may_sync_now(odev) && 158728bc3b8cSAndreas Gruenbacher _drbd_set_state(_NS(odev, aftr_isp, 1), 158828bc3b8cSAndreas Gruenbacher CS_HARD, NULL) != SS_NOTHING_TO_DO) 158928bc3b8cSAndreas Gruenbacher changed = true; 1590b411b363SPhilipp Reisner } 1591695d08faSPhilipp Reisner rcu_read_unlock(); 1592b411b363SPhilipp Reisner 159328bc3b8cSAndreas Gruenbacher return changed; 1594b411b363SPhilipp Reisner } 1595b411b363SPhilipp Reisner 1596b411b363SPhilipp Reisner /** 159728bc3b8cSAndreas Gruenbacher * drbd_resume_next() - Resume resync on all devices that may resync now 1598b30ab791SAndreas Gruenbacher * @device: DRBD device. 1599b411b363SPhilipp Reisner * 1600b411b363SPhilipp Reisner * Called from process context only (admin command and worker). 1601b411b363SPhilipp Reisner */ 160228bc3b8cSAndreas Gruenbacher static bool drbd_resume_next(struct drbd_device *device) 1603b411b363SPhilipp Reisner { 160428bc3b8cSAndreas Gruenbacher bool changed = false; 160554761697SAndreas Gruenbacher struct drbd_device *odev; 160628bc3b8cSAndreas Gruenbacher int i; 1607b411b363SPhilipp Reisner 1608695d08faSPhilipp Reisner rcu_read_lock(); 160905a10ec7SAndreas Gruenbacher idr_for_each_entry(&drbd_devices, odev, i) { 1610b411b363SPhilipp Reisner if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) 1611b411b363SPhilipp Reisner continue; 1612b411b363SPhilipp Reisner if (odev->state.aftr_isp) { 161328bc3b8cSAndreas Gruenbacher if (_drbd_may_sync_now(odev) && 161428bc3b8cSAndreas Gruenbacher _drbd_set_state(_NS(odev, aftr_isp, 0), 161528bc3b8cSAndreas Gruenbacher CS_HARD, NULL) != SS_NOTHING_TO_DO) 161628bc3b8cSAndreas Gruenbacher changed = true; 1617b411b363SPhilipp Reisner } 1618b411b363SPhilipp Reisner } 1619695d08faSPhilipp Reisner rcu_read_unlock(); 162028bc3b8cSAndreas Gruenbacher return changed; 1621b411b363SPhilipp Reisner } 1622b411b363SPhilipp Reisner 1623b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device) 1624b411b363SPhilipp Reisner { 162528bc3b8cSAndreas Gruenbacher lock_all_resources(); 162628bc3b8cSAndreas Gruenbacher drbd_resume_next(device); 162728bc3b8cSAndreas Gruenbacher unlock_all_resources(); 1628b411b363SPhilipp Reisner } 1629b411b363SPhilipp Reisner 1630b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device) 1631b411b363SPhilipp Reisner { 163228bc3b8cSAndreas Gruenbacher lock_all_resources(); 163328bc3b8cSAndreas Gruenbacher drbd_pause_after(device); 163428bc3b8cSAndreas Gruenbacher unlock_all_resources(); 1635b411b363SPhilipp Reisner } 1636b411b363SPhilipp Reisner 163728bc3b8cSAndreas Gruenbacher /* caller must lock_all_resources() */ 1638b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) 1639b411b363SPhilipp Reisner { 164054761697SAndreas Gruenbacher struct drbd_device *odev; 164195f8efd0SAndreas Gruenbacher int resync_after; 1642b411b363SPhilipp Reisner 1643b411b363SPhilipp Reisner if (o_minor == -1) 1644b411b363SPhilipp Reisner return NO_ERROR; 1645a3f8f7dcSLars Ellenberg if (o_minor < -1 || o_minor > MINORMASK) 164695f8efd0SAndreas Gruenbacher return ERR_RESYNC_AFTER; 1647b411b363SPhilipp Reisner 1648b411b363SPhilipp Reisner /* check for loops */ 1649b30ab791SAndreas Gruenbacher odev = minor_to_device(o_minor); 1650b411b363SPhilipp Reisner while (1) { 1651b30ab791SAndreas Gruenbacher if (odev == device) 165295f8efd0SAndreas Gruenbacher return ERR_RESYNC_AFTER_CYCLE; 1653b411b363SPhilipp Reisner 1654a3f8f7dcSLars Ellenberg /* You are free to depend on diskless, non-existing, 1655a3f8f7dcSLars Ellenberg * or not yet/no longer existing minors. 1656a3f8f7dcSLars Ellenberg * We only reject dependency loops. 1657a3f8f7dcSLars Ellenberg * We cannot follow the dependency chain beyond a detached or 1658a3f8f7dcSLars Ellenberg * missing minor. 1659a3f8f7dcSLars Ellenberg */ 1660a3f8f7dcSLars Ellenberg if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS) 1661a3f8f7dcSLars Ellenberg return NO_ERROR; 1662a3f8f7dcSLars Ellenberg 1663daeda1ccSPhilipp Reisner rcu_read_lock(); 166495f8efd0SAndreas Gruenbacher resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; 1665daeda1ccSPhilipp Reisner rcu_read_unlock(); 1666b411b363SPhilipp Reisner /* dependency chain ends here, no cycles. */ 166795f8efd0SAndreas Gruenbacher if (resync_after == -1) 1668b411b363SPhilipp Reisner return NO_ERROR; 1669b411b363SPhilipp Reisner 1670b411b363SPhilipp Reisner /* follow the dependency chain */ 1671b30ab791SAndreas Gruenbacher odev = minor_to_device(resync_after); 1672b411b363SPhilipp Reisner } 1673b411b363SPhilipp Reisner } 1674b411b363SPhilipp Reisner 167528bc3b8cSAndreas Gruenbacher /* caller must lock_all_resources() */ 1676b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device) 1677b411b363SPhilipp Reisner { 167828bc3b8cSAndreas Gruenbacher int changed; 1679b411b363SPhilipp Reisner 1680b411b363SPhilipp Reisner do { 168128bc3b8cSAndreas Gruenbacher changed = drbd_pause_after(device); 168228bc3b8cSAndreas Gruenbacher changed |= drbd_resume_next(device); 168328bc3b8cSAndreas Gruenbacher } while (changed); 1684b411b363SPhilipp Reisner } 1685b411b363SPhilipp Reisner 1686b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device) 16879bd28d3cSLars Ellenberg { 1688ff8bd88bSLars Ellenberg struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; 1689813472ceSPhilipp Reisner struct fifo_buffer *plan; 1690813472ceSPhilipp Reisner 1691b30ab791SAndreas Gruenbacher atomic_set(&device->rs_sect_in, 0); 1692b30ab791SAndreas Gruenbacher atomic_set(&device->rs_sect_ev, 0); 1693b30ab791SAndreas Gruenbacher device->rs_in_flight = 0; 169459767fbdSMichael Callahan device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors); 1695813472ceSPhilipp Reisner 1696813472ceSPhilipp Reisner /* Updating the RCU protected object in place is necessary since 1697813472ceSPhilipp Reisner this function gets called from atomic context. 1698813472ceSPhilipp Reisner It is valid since all other updates also lead to an completely 1699813472ceSPhilipp Reisner empty fifo */ 1700813472ceSPhilipp Reisner rcu_read_lock(); 1701b30ab791SAndreas Gruenbacher plan = rcu_dereference(device->rs_plan_s); 1702813472ceSPhilipp Reisner plan->total = 0; 1703813472ceSPhilipp Reisner fifo_set(plan, 0); 1704813472ceSPhilipp Reisner rcu_read_unlock(); 17059bd28d3cSLars Ellenberg } 17069bd28d3cSLars Ellenberg 17072bccef39SKees Cook void start_resync_timer_fn(struct timer_list *t) 17081f04af33SPhilipp Reisner { 17092bccef39SKees Cook struct drbd_device *device = from_timer(device, t, start_resync_timer); 1710ac0acb9eSLars Ellenberg drbd_device_post_work(device, RS_START); 17111f04af33SPhilipp Reisner } 17121f04af33SPhilipp Reisner 1713ac0acb9eSLars Ellenberg static void do_start_resync(struct drbd_device *device) 17141f04af33SPhilipp Reisner { 1715b30ab791SAndreas Gruenbacher if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { 1716ac0acb9eSLars Ellenberg drbd_warn(device, "postponing start_resync ...\n"); 1717b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ/10; 1718b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 1719ac0acb9eSLars Ellenberg return; 17201f04af33SPhilipp Reisner } 17211f04af33SPhilipp Reisner 1722b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_SOURCE); 1723b30ab791SAndreas Gruenbacher clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); 17241f04af33SPhilipp Reisner } 17251f04af33SPhilipp Reisner 1726aaaba345SLars Ellenberg static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) 1727aaaba345SLars Ellenberg { 1728aaaba345SLars Ellenberg bool csums_after_crash_only; 1729aaaba345SLars Ellenberg rcu_read_lock(); 1730aaaba345SLars Ellenberg csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only; 1731aaaba345SLars Ellenberg rcu_read_unlock(); 1732aaaba345SLars Ellenberg return connection->agreed_pro_version >= 89 && /* supported? */ 1733aaaba345SLars Ellenberg connection->csums_tfm && /* configured? */ 17347e5fec31SFabian Frederick (csums_after_crash_only == false /* use for each resync? */ 1735aaaba345SLars Ellenberg || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ 1736aaaba345SLars Ellenberg } 1737aaaba345SLars Ellenberg 1738b411b363SPhilipp Reisner /** 1739b411b363SPhilipp Reisner * drbd_start_resync() - Start the resync process 1740b30ab791SAndreas Gruenbacher * @device: DRBD device. 1741b411b363SPhilipp Reisner * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET 1742b411b363SPhilipp Reisner * 1743b411b363SPhilipp Reisner * This function might bring you directly into one of the 1744b411b363SPhilipp Reisner * C_PAUSED_SYNC_* states. 1745b411b363SPhilipp Reisner */ 1746b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) 1747b411b363SPhilipp Reisner { 174844a4d551SLars Ellenberg struct drbd_peer_device *peer_device = first_peer_device(device); 174944a4d551SLars Ellenberg struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; 1750b411b363SPhilipp Reisner union drbd_state ns; 1751b411b363SPhilipp Reisner int r; 1752b411b363SPhilipp Reisner 1753b30ab791SAndreas Gruenbacher if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) { 1754d0180171SAndreas Gruenbacher drbd_err(device, "Resync already running!\n"); 1755b411b363SPhilipp Reisner return; 1756b411b363SPhilipp Reisner } 1757b411b363SPhilipp Reisner 1758d3d2948fSRoland Kammerer if (!connection) { 1759d3d2948fSRoland Kammerer drbd_err(device, "No connection to peer, aborting!\n"); 1760d3d2948fSRoland Kammerer return; 1761d3d2948fSRoland Kammerer } 1762d3d2948fSRoland Kammerer 1763b30ab791SAndreas Gruenbacher if (!test_bit(B_RS_H_DONE, &device->flags)) { 1764b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) { 1765b411b363SPhilipp Reisner /* Since application IO was locked out during C_WF_BITMAP_T and 1766b411b363SPhilipp Reisner C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET 1767b411b363SPhilipp Reisner we check that we might make the data inconsistent. */ 1768b30ab791SAndreas Gruenbacher r = drbd_khelper(device, "before-resync-target"); 1769b411b363SPhilipp Reisner r = (r >> 8) & 0xff; 1770b411b363SPhilipp Reisner if (r > 0) { 1771d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-target handler returned %d, " 1772b411b363SPhilipp Reisner "dropping connection.\n", r); 177344a4d551SLars Ellenberg conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 1774b411b363SPhilipp Reisner return; 1775b411b363SPhilipp Reisner } 177609b9e797SPhilipp Reisner } else /* C_SYNC_SOURCE */ { 1777b30ab791SAndreas Gruenbacher r = drbd_khelper(device, "before-resync-source"); 177809b9e797SPhilipp Reisner r = (r >> 8) & 0xff; 177909b9e797SPhilipp Reisner if (r > 0) { 178009b9e797SPhilipp Reisner if (r == 3) { 1781d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-source handler returned %d, " 178209b9e797SPhilipp Reisner "ignoring. Old userland tools?", r); 178309b9e797SPhilipp Reisner } else { 1784d0180171SAndreas Gruenbacher drbd_info(device, "before-resync-source handler returned %d, " 178509b9e797SPhilipp Reisner "dropping connection.\n", r); 178644a4d551SLars Ellenberg conn_request_state(connection, 1787a6b32bc3SAndreas Gruenbacher NS(conn, C_DISCONNECTING), CS_HARD); 178809b9e797SPhilipp Reisner return; 178909b9e797SPhilipp Reisner } 179009b9e797SPhilipp Reisner } 1791b411b363SPhilipp Reisner } 1792e64a3294SPhilipp Reisner } 1793b411b363SPhilipp Reisner 179444a4d551SLars Ellenberg if (current == connection->worker.task) { 1795dad20554SPhilipp Reisner /* The worker should not sleep waiting for state_mutex, 1796e64a3294SPhilipp Reisner that can take long */ 1797b30ab791SAndreas Gruenbacher if (!mutex_trylock(device->state_mutex)) { 1798b30ab791SAndreas Gruenbacher set_bit(B_RS_H_DONE, &device->flags); 1799b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ/5; 1800b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 1801e64a3294SPhilipp Reisner return; 1802e64a3294SPhilipp Reisner } 1803e64a3294SPhilipp Reisner } else { 1804b30ab791SAndreas Gruenbacher mutex_lock(device->state_mutex); 1805e64a3294SPhilipp Reisner } 1806b411b363SPhilipp Reisner 180728bc3b8cSAndreas Gruenbacher lock_all_resources(); 180828bc3b8cSAndreas Gruenbacher clear_bit(B_RS_H_DONE, &device->flags); 1809a700471bSPhilipp Reisner /* Did some connection breakage or IO error race with us? */ 1810b30ab791SAndreas Gruenbacher if (device->state.conn < C_CONNECTED 1811b30ab791SAndreas Gruenbacher || !get_ldev_if_state(device, D_NEGOTIATING)) { 181228bc3b8cSAndreas Gruenbacher unlock_all_resources(); 181328bc3b8cSAndreas Gruenbacher goto out; 1814b411b363SPhilipp Reisner } 1815b411b363SPhilipp Reisner 1816b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 1817b411b363SPhilipp Reisner 1818b30ab791SAndreas Gruenbacher ns.aftr_isp = !_drbd_may_sync_now(device); 1819b411b363SPhilipp Reisner 1820b411b363SPhilipp Reisner ns.conn = side; 1821b411b363SPhilipp Reisner 1822b411b363SPhilipp Reisner if (side == C_SYNC_TARGET) 1823b411b363SPhilipp Reisner ns.disk = D_INCONSISTENT; 1824b411b363SPhilipp Reisner else /* side == C_SYNC_SOURCE */ 1825b411b363SPhilipp Reisner ns.pdsk = D_INCONSISTENT; 1826b411b363SPhilipp Reisner 182728bc3b8cSAndreas Gruenbacher r = _drbd_set_state(device, ns, CS_VERBOSE, NULL); 1828b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 1829b411b363SPhilipp Reisner 1830b411b363SPhilipp Reisner if (ns.conn < C_CONNECTED) 1831b411b363SPhilipp Reisner r = SS_UNKNOWN_ERROR; 1832b411b363SPhilipp Reisner 1833b411b363SPhilipp Reisner if (r == SS_SUCCESS) { 1834b30ab791SAndreas Gruenbacher unsigned long tw = drbd_bm_total_weight(device); 18351d7734a0SLars Ellenberg unsigned long now = jiffies; 18361d7734a0SLars Ellenberg int i; 18371d7734a0SLars Ellenberg 1838b30ab791SAndreas Gruenbacher device->rs_failed = 0; 1839b30ab791SAndreas Gruenbacher device->rs_paused = 0; 1840b30ab791SAndreas Gruenbacher device->rs_same_csum = 0; 1841b30ab791SAndreas Gruenbacher device->rs_last_sect_ev = 0; 1842b30ab791SAndreas Gruenbacher device->rs_total = tw; 1843b30ab791SAndreas Gruenbacher device->rs_start = now; 18441d7734a0SLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 1845b30ab791SAndreas Gruenbacher device->rs_mark_left[i] = tw; 1846b30ab791SAndreas Gruenbacher device->rs_mark_time[i] = now; 18471d7734a0SLars Ellenberg } 184828bc3b8cSAndreas Gruenbacher drbd_pause_after(device); 18495ab7d2c0SLars Ellenberg /* Forget potentially stale cached per resync extent bit-counts. 18505ab7d2c0SLars Ellenberg * Open coded drbd_rs_cancel_all(device), we already have IRQs 18515ab7d2c0SLars Ellenberg * disabled, and know the disk state is ok. */ 18525ab7d2c0SLars Ellenberg spin_lock(&device->al_lock); 18535ab7d2c0SLars Ellenberg lc_reset(device->resync); 18545ab7d2c0SLars Ellenberg device->resync_locked = 0; 18555ab7d2c0SLars Ellenberg device->resync_wenr = LC_FREE; 18565ab7d2c0SLars Ellenberg spin_unlock(&device->al_lock); 1857b411b363SPhilipp Reisner } 185828bc3b8cSAndreas Gruenbacher unlock_all_resources(); 18595a22db89SLars Ellenberg 18606c922ed5SLars Ellenberg if (r == SS_SUCCESS) { 18615ab7d2c0SLars Ellenberg wake_up(&device->al_wait); /* for lc_reset() above */ 1862328e0f12SPhilipp Reisner /* reset rs_last_bcast when a resync or verify is started, 1863328e0f12SPhilipp Reisner * to deal with potential jiffies wrap. */ 1864b30ab791SAndreas Gruenbacher device->rs_last_bcast = jiffies - HZ; 1865328e0f12SPhilipp Reisner 1866d0180171SAndreas Gruenbacher drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", 18676c922ed5SLars Ellenberg drbd_conn_str(ns.conn), 1868b30ab791SAndreas Gruenbacher (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), 1869b30ab791SAndreas Gruenbacher (unsigned long) device->rs_total); 1870aaaba345SLars Ellenberg if (side == C_SYNC_TARGET) { 1871b30ab791SAndreas Gruenbacher device->bm_resync_fo = 0; 1872aaaba345SLars Ellenberg device->use_csums = use_checksum_based_resync(connection, device); 1873aaaba345SLars Ellenberg } else { 18747e5fec31SFabian Frederick device->use_csums = false; 1875aaaba345SLars Ellenberg } 18765a22db89SLars Ellenberg 18775a22db89SLars Ellenberg /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid 18785a22db89SLars Ellenberg * with w_send_oos, or the sync target will get confused as to 18795a22db89SLars Ellenberg * how much bits to resync. We cannot do that always, because for an 18805a22db89SLars Ellenberg * empty resync and protocol < 95, we need to do it here, as we call 18815a22db89SLars Ellenberg * drbd_resync_finished from here in that case. 18825a22db89SLars Ellenberg * We drbd_gen_and_send_sync_uuid here for protocol < 96, 18835a22db89SLars Ellenberg * and from after_state_ch otherwise. */ 188444a4d551SLars Ellenberg if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96) 188544a4d551SLars Ellenberg drbd_gen_and_send_sync_uuid(peer_device); 1886b411b363SPhilipp Reisner 188744a4d551SLars Ellenberg if (connection->agreed_pro_version < 95 && device->rs_total == 0) { 1888af85e8e8SLars Ellenberg /* This still has a race (about when exactly the peers 1889af85e8e8SLars Ellenberg * detect connection loss) that can lead to a full sync 1890af85e8e8SLars Ellenberg * on next handshake. In 8.3.9 we fixed this with explicit 1891af85e8e8SLars Ellenberg * resync-finished notifications, but the fix 1892af85e8e8SLars Ellenberg * introduces a protocol change. Sleeping for some 1893af85e8e8SLars Ellenberg * time longer than the ping interval + timeout on the 1894af85e8e8SLars Ellenberg * SyncSource, to give the SyncTarget the chance to 1895af85e8e8SLars Ellenberg * detect connection loss, then waiting for a ping 1896af85e8e8SLars Ellenberg * response (implicit in drbd_resync_finished) reduces 1897af85e8e8SLars Ellenberg * the race considerably, but does not solve it. */ 189844ed167dSPhilipp Reisner if (side == C_SYNC_SOURCE) { 189944ed167dSPhilipp Reisner struct net_conf *nc; 190044ed167dSPhilipp Reisner int timeo; 190144ed167dSPhilipp Reisner 190244ed167dSPhilipp Reisner rcu_read_lock(); 190344a4d551SLars Ellenberg nc = rcu_dereference(connection->net_conf); 190444ed167dSPhilipp Reisner timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; 190544ed167dSPhilipp Reisner rcu_read_unlock(); 190644ed167dSPhilipp Reisner schedule_timeout_interruptible(timeo); 190744ed167dSPhilipp Reisner } 1908b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 1909b411b363SPhilipp Reisner } 1910b411b363SPhilipp Reisner 1911b30ab791SAndreas Gruenbacher drbd_rs_controller_reset(device); 1912b30ab791SAndreas Gruenbacher /* ns.conn may already be != device->state.conn, 1913b411b363SPhilipp Reisner * we may have been paused in between, or become paused until 1914b411b363SPhilipp Reisner * the timer triggers. 1915b411b363SPhilipp Reisner * No matter, that is handled in resync_timer_fn() */ 1916b411b363SPhilipp Reisner if (ns.conn == C_SYNC_TARGET) 1917b30ab791SAndreas Gruenbacher mod_timer(&device->resync_timer, jiffies); 1918b411b363SPhilipp Reisner 1919b30ab791SAndreas Gruenbacher drbd_md_sync(device); 1920b411b363SPhilipp Reisner } 1921b30ab791SAndreas Gruenbacher put_ldev(device); 192228bc3b8cSAndreas Gruenbacher out: 1923b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 1924b411b363SPhilipp Reisner } 1925b411b363SPhilipp Reisner 1926e334f550SLars Ellenberg static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) 1927c7a58db4SLars Ellenberg { 1928c7a58db4SLars Ellenberg struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; 1929c7a58db4SLars Ellenberg device->rs_last_bcast = jiffies; 1930c7a58db4SLars Ellenberg 1931c7a58db4SLars Ellenberg if (!get_ldev(device)) 1932c7a58db4SLars Ellenberg return; 1933c7a58db4SLars Ellenberg 1934c7a58db4SLars Ellenberg drbd_bm_write_lazy(device, 0); 19355ab7d2c0SLars Ellenberg if (resync_done && is_sync_state(device->state.conn)) 1936c7a58db4SLars Ellenberg drbd_resync_finished(device); 19375ab7d2c0SLars Ellenberg 1938c7a58db4SLars Ellenberg drbd_bcast_event(device, &sib); 1939c7a58db4SLars Ellenberg /* update timestamp, in case it took a while to write out stuff */ 1940c7a58db4SLars Ellenberg device->rs_last_bcast = jiffies; 1941c7a58db4SLars Ellenberg put_ldev(device); 1942c7a58db4SLars Ellenberg } 1943c7a58db4SLars Ellenberg 1944e334f550SLars Ellenberg static void drbd_ldev_destroy(struct drbd_device *device) 1945e334f550SLars Ellenberg { 1946e334f550SLars Ellenberg lc_destroy(device->resync); 1947e334f550SLars Ellenberg device->resync = NULL; 1948e334f550SLars Ellenberg lc_destroy(device->act_log); 1949e334f550SLars Ellenberg device->act_log = NULL; 1950d1b80853SAndreas Gruenbacher 1951d1b80853SAndreas Gruenbacher __acquire(local); 195263a7c8adSLars Ellenberg drbd_backing_dev_free(device, device->ldev); 1953d1b80853SAndreas Gruenbacher device->ldev = NULL; 1954d1b80853SAndreas Gruenbacher __release(local); 1955d1b80853SAndreas Gruenbacher 1956e334f550SLars Ellenberg clear_bit(GOING_DISKLESS, &device->flags); 1957e334f550SLars Ellenberg wake_up(&device->misc_wait); 1958e334f550SLars Ellenberg } 1959e334f550SLars Ellenberg 1960e334f550SLars Ellenberg static void go_diskless(struct drbd_device *device) 1961e334f550SLars Ellenberg { 1962e334f550SLars Ellenberg D_ASSERT(device, device->state.disk == D_FAILED); 1963e334f550SLars Ellenberg /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will 1964e334f550SLars Ellenberg * inc/dec it frequently. Once we are D_DISKLESS, no one will touch 1965e334f550SLars Ellenberg * the protected members anymore, though, so once put_ldev reaches zero 1966e334f550SLars Ellenberg * again, it will be safe to free them. */ 1967e334f550SLars Ellenberg 1968e334f550SLars Ellenberg /* Try to write changed bitmap pages, read errors may have just 1969e334f550SLars Ellenberg * set some bits outside the area covered by the activity log. 1970e334f550SLars Ellenberg * 1971e334f550SLars Ellenberg * If we have an IO error during the bitmap writeout, 1972e334f550SLars Ellenberg * we will want a full sync next time, just in case. 1973e334f550SLars Ellenberg * (Do we want a specific meta data flag for this?) 1974e334f550SLars Ellenberg * 1975e334f550SLars Ellenberg * If that does not make it to stable storage either, 1976e334f550SLars Ellenberg * we cannot do anything about that anymore. 1977e334f550SLars Ellenberg * 1978e334f550SLars Ellenberg * We still need to check if both bitmap and ldev are present, we may 1979e334f550SLars Ellenberg * end up here after a failed attach, before ldev was even assigned. 1980e334f550SLars Ellenberg */ 1981e334f550SLars Ellenberg if (device->bitmap && device->ldev) { 1982e334f550SLars Ellenberg /* An interrupted resync or similar is allowed to recounts bits 1983e334f550SLars Ellenberg * while we detach. 1984e334f550SLars Ellenberg * Any modifications would not be expected anymore, though. 1985e334f550SLars Ellenberg */ 1986e334f550SLars Ellenberg if (drbd_bitmap_io_from_worker(device, drbd_bm_write, 1987e334f550SLars Ellenberg "detach", BM_LOCKED_TEST_ALLOWED)) { 1988e334f550SLars Ellenberg if (test_bit(WAS_READ_ERROR, &device->flags)) { 1989e334f550SLars Ellenberg drbd_md_set_flag(device, MDF_FULL_SYNC); 1990e334f550SLars Ellenberg drbd_md_sync(device); 1991e334f550SLars Ellenberg } 1992e334f550SLars Ellenberg } 1993e334f550SLars Ellenberg } 1994e334f550SLars Ellenberg 1995e334f550SLars Ellenberg drbd_force_state(device, NS(disk, D_DISKLESS)); 1996e334f550SLars Ellenberg } 1997e334f550SLars Ellenberg 1998ac0acb9eSLars Ellenberg static int do_md_sync(struct drbd_device *device) 1999ac0acb9eSLars Ellenberg { 2000ac0acb9eSLars Ellenberg drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); 2001ac0acb9eSLars Ellenberg drbd_md_sync(device); 2002ac0acb9eSLars Ellenberg return 0; 2003ac0acb9eSLars Ellenberg } 2004ac0acb9eSLars Ellenberg 2005944410e9SLars Ellenberg /* only called from drbd_worker thread, no locking */ 2006944410e9SLars Ellenberg void __update_timing_details( 2007944410e9SLars Ellenberg struct drbd_thread_timing_details *tdp, 2008944410e9SLars Ellenberg unsigned int *cb_nr, 2009944410e9SLars Ellenberg void *cb, 2010944410e9SLars Ellenberg const char *fn, const unsigned int line) 2011944410e9SLars Ellenberg { 2012944410e9SLars Ellenberg unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST; 2013944410e9SLars Ellenberg struct drbd_thread_timing_details *td = tdp + i; 2014944410e9SLars Ellenberg 2015944410e9SLars Ellenberg td->start_jif = jiffies; 2016944410e9SLars Ellenberg td->cb_addr = cb; 2017944410e9SLars Ellenberg td->caller_fn = fn; 2018944410e9SLars Ellenberg td->line = line; 2019944410e9SLars Ellenberg td->cb_nr = *cb_nr; 2020944410e9SLars Ellenberg 2021944410e9SLars Ellenberg i = (i+1) % DRBD_THREAD_DETAILS_HIST; 2022944410e9SLars Ellenberg td = tdp + i; 2023944410e9SLars Ellenberg memset(td, 0, sizeof(*td)); 2024944410e9SLars Ellenberg 2025944410e9SLars Ellenberg ++(*cb_nr); 2026944410e9SLars Ellenberg } 2027944410e9SLars Ellenberg 2028e334f550SLars Ellenberg static void do_device_work(struct drbd_device *device, const unsigned long todo) 2029e334f550SLars Ellenberg { 2030b47a06d1SAndreas Gruenbacher if (test_bit(MD_SYNC, &todo)) 2031ac0acb9eSLars Ellenberg do_md_sync(device); 2032b47a06d1SAndreas Gruenbacher if (test_bit(RS_DONE, &todo) || 2033b47a06d1SAndreas Gruenbacher test_bit(RS_PROGRESS, &todo)) 2034b47a06d1SAndreas Gruenbacher update_on_disk_bitmap(device, test_bit(RS_DONE, &todo)); 2035b47a06d1SAndreas Gruenbacher if (test_bit(GO_DISKLESS, &todo)) 2036e334f550SLars Ellenberg go_diskless(device); 2037b47a06d1SAndreas Gruenbacher if (test_bit(DESTROY_DISK, &todo)) 2038e334f550SLars Ellenberg drbd_ldev_destroy(device); 2039b47a06d1SAndreas Gruenbacher if (test_bit(RS_START, &todo)) 2040ac0acb9eSLars Ellenberg do_start_resync(device); 2041e334f550SLars Ellenberg } 2042e334f550SLars Ellenberg 2043e334f550SLars Ellenberg #define DRBD_DEVICE_WORK_MASK \ 2044e334f550SLars Ellenberg ((1UL << GO_DISKLESS) \ 2045e334f550SLars Ellenberg |(1UL << DESTROY_DISK) \ 2046ac0acb9eSLars Ellenberg |(1UL << MD_SYNC) \ 2047ac0acb9eSLars Ellenberg |(1UL << RS_START) \ 2048e334f550SLars Ellenberg |(1UL << RS_PROGRESS) \ 2049e334f550SLars Ellenberg |(1UL << RS_DONE) \ 2050e334f550SLars Ellenberg ) 2051e334f550SLars Ellenberg 2052e334f550SLars Ellenberg static unsigned long get_work_bits(unsigned long *flags) 2053e334f550SLars Ellenberg { 2054e334f550SLars Ellenberg unsigned long old, new; 2055e334f550SLars Ellenberg do { 2056e334f550SLars Ellenberg old = *flags; 2057e334f550SLars Ellenberg new = old & ~DRBD_DEVICE_WORK_MASK; 2058e334f550SLars Ellenberg } while (cmpxchg(flags, old, new) != old); 2059e334f550SLars Ellenberg return old & DRBD_DEVICE_WORK_MASK; 2060e334f550SLars Ellenberg } 2061e334f550SLars Ellenberg 2062e334f550SLars Ellenberg static void do_unqueued_work(struct drbd_connection *connection) 2063c7a58db4SLars Ellenberg { 2064c7a58db4SLars Ellenberg struct drbd_peer_device *peer_device; 2065c7a58db4SLars Ellenberg int vnr; 2066c7a58db4SLars Ellenberg 2067c7a58db4SLars Ellenberg rcu_read_lock(); 2068c7a58db4SLars Ellenberg idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2069c7a58db4SLars Ellenberg struct drbd_device *device = peer_device->device; 2070e334f550SLars Ellenberg unsigned long todo = get_work_bits(&device->flags); 2071e334f550SLars Ellenberg if (!todo) 2072c7a58db4SLars Ellenberg continue; 20735ab7d2c0SLars Ellenberg 2074c7a58db4SLars Ellenberg kref_get(&device->kref); 2075c7a58db4SLars Ellenberg rcu_read_unlock(); 2076e334f550SLars Ellenberg do_device_work(device, todo); 2077c7a58db4SLars Ellenberg kref_put(&device->kref, drbd_destroy_device); 2078c7a58db4SLars Ellenberg rcu_read_lock(); 2079c7a58db4SLars Ellenberg } 2080c7a58db4SLars Ellenberg rcu_read_unlock(); 2081c7a58db4SLars Ellenberg } 2082c7a58db4SLars Ellenberg 2083a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) 20848c0785a5SLars Ellenberg { 20858c0785a5SLars Ellenberg spin_lock_irq(&queue->q_lock); 208615e26f6aSLars Ellenberg list_splice_tail_init(&queue->q, work_list); 20878c0785a5SLars Ellenberg spin_unlock_irq(&queue->q_lock); 20888c0785a5SLars Ellenberg return !list_empty(work_list); 20898c0785a5SLars Ellenberg } 20908c0785a5SLars Ellenberg 2091bde89a9eSAndreas Gruenbacher static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list) 2092b6dd1a89SLars Ellenberg { 2093b6dd1a89SLars Ellenberg DEFINE_WAIT(wait); 2094b6dd1a89SLars Ellenberg struct net_conf *nc; 2095b6dd1a89SLars Ellenberg int uncork, cork; 2096b6dd1a89SLars Ellenberg 2097abde9cc6SLars Ellenberg dequeue_work_batch(&connection->sender_work, work_list); 2098b6dd1a89SLars Ellenberg if (!list_empty(work_list)) 2099b6dd1a89SLars Ellenberg return; 2100b6dd1a89SLars Ellenberg 2101b6dd1a89SLars Ellenberg /* Still nothing to do? 2102b6dd1a89SLars Ellenberg * Maybe we still need to close the current epoch, 2103b6dd1a89SLars Ellenberg * even if no new requests are queued yet. 2104b6dd1a89SLars Ellenberg * 2105b6dd1a89SLars Ellenberg * Also, poke TCP, just in case. 2106b6dd1a89SLars Ellenberg * Then wait for new work (or signal). */ 2107b6dd1a89SLars Ellenberg rcu_read_lock(); 2108b6dd1a89SLars Ellenberg nc = rcu_dereference(connection->net_conf); 2109b6dd1a89SLars Ellenberg uncork = nc ? nc->tcp_cork : 0; 2110b6dd1a89SLars Ellenberg rcu_read_unlock(); 2111b6dd1a89SLars Ellenberg if (uncork) { 2112b6dd1a89SLars Ellenberg mutex_lock(&connection->data.mutex); 2113b6dd1a89SLars Ellenberg if (connection->data.socket) 2114b6dd1a89SLars Ellenberg drbd_tcp_uncork(connection->data.socket); 2115b6dd1a89SLars Ellenberg mutex_unlock(&connection->data.mutex); 2116b6dd1a89SLars Ellenberg } 2117b6dd1a89SLars Ellenberg 2118b6dd1a89SLars Ellenberg for (;;) { 2119b6dd1a89SLars Ellenberg int send_barrier; 2120b6dd1a89SLars Ellenberg prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); 21210500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock); 2122b6dd1a89SLars Ellenberg spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 2123bc317a9eSLars Ellenberg if (!list_empty(&connection->sender_work.q)) 21244dd726f0SLars Ellenberg list_splice_tail_init(&connection->sender_work.q, work_list); 2125b6dd1a89SLars Ellenberg spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 2126b6dd1a89SLars Ellenberg if (!list_empty(work_list) || signal_pending(current)) { 21270500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 2128b6dd1a89SLars Ellenberg break; 2129b6dd1a89SLars Ellenberg } 2130f9c78128SLars Ellenberg 2131f9c78128SLars Ellenberg /* We found nothing new to do, no to-be-communicated request, 2132f9c78128SLars Ellenberg * no other work item. We may still need to close the last 2133f9c78128SLars Ellenberg * epoch. Next incoming request epoch will be connection -> 2134f9c78128SLars Ellenberg * current transfer log epoch number. If that is different 2135f9c78128SLars Ellenberg * from the epoch of the last request we communicated, it is 2136f9c78128SLars Ellenberg * safe to send the epoch separating barrier now. 2137f9c78128SLars Ellenberg */ 2138f9c78128SLars Ellenberg send_barrier = 2139f9c78128SLars Ellenberg atomic_read(&connection->current_tle_nr) != 2140f9c78128SLars Ellenberg connection->send.current_epoch_nr; 21410500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 2142f9c78128SLars Ellenberg 2143f9c78128SLars Ellenberg if (send_barrier) 2144f9c78128SLars Ellenberg maybe_send_barrier(connection, 2145f9c78128SLars Ellenberg connection->send.current_epoch_nr + 1); 21465ab7d2c0SLars Ellenberg 2147e334f550SLars Ellenberg if (test_bit(DEVICE_WORK_PENDING, &connection->flags)) 21485ab7d2c0SLars Ellenberg break; 21495ab7d2c0SLars Ellenberg 2150a80ca1aeSLars Ellenberg /* drbd_send() may have called flush_signals() */ 2151a80ca1aeSLars Ellenberg if (get_t_state(&connection->worker) != RUNNING) 2152a80ca1aeSLars Ellenberg break; 21535ab7d2c0SLars Ellenberg 2154b6dd1a89SLars Ellenberg schedule(); 2155b6dd1a89SLars Ellenberg /* may be woken up for other things but new work, too, 2156b6dd1a89SLars Ellenberg * e.g. if the current epoch got closed. 2157b6dd1a89SLars Ellenberg * In which case we send the barrier above. */ 2158b6dd1a89SLars Ellenberg } 2159b6dd1a89SLars Ellenberg finish_wait(&connection->sender_work.q_wait, &wait); 2160b6dd1a89SLars Ellenberg 2161b6dd1a89SLars Ellenberg /* someone may have changed the config while we have been waiting above. */ 2162b6dd1a89SLars Ellenberg rcu_read_lock(); 2163b6dd1a89SLars Ellenberg nc = rcu_dereference(connection->net_conf); 2164b6dd1a89SLars Ellenberg cork = nc ? nc->tcp_cork : 0; 2165b6dd1a89SLars Ellenberg rcu_read_unlock(); 2166b6dd1a89SLars Ellenberg mutex_lock(&connection->data.mutex); 2167b6dd1a89SLars Ellenberg if (connection->data.socket) { 2168b6dd1a89SLars Ellenberg if (cork) 2169b6dd1a89SLars Ellenberg drbd_tcp_cork(connection->data.socket); 2170b6dd1a89SLars Ellenberg else if (!uncork) 2171b6dd1a89SLars Ellenberg drbd_tcp_uncork(connection->data.socket); 2172b6dd1a89SLars Ellenberg } 2173b6dd1a89SLars Ellenberg mutex_unlock(&connection->data.mutex); 2174b6dd1a89SLars Ellenberg } 2175b6dd1a89SLars Ellenberg 2176b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi) 2177b411b363SPhilipp Reisner { 2178bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 21796db7e50aSAndreas Gruenbacher struct drbd_work *w = NULL; 2180c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2181b411b363SPhilipp Reisner LIST_HEAD(work_list); 21828c0785a5SLars Ellenberg int vnr; 2183b411b363SPhilipp Reisner 2184e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) { 218580822284SPhilipp Reisner drbd_thread_current_set_cpu(thi); 2186b411b363SPhilipp Reisner 2187944410e9SLars Ellenberg if (list_empty(&work_list)) { 2188944410e9SLars Ellenberg update_worker_timing_details(connection, wait_for_work); 2189bde89a9eSAndreas Gruenbacher wait_for_work(connection, &work_list); 2190944410e9SLars Ellenberg } 2191b411b363SPhilipp Reisner 2192944410e9SLars Ellenberg if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { 2193944410e9SLars Ellenberg update_worker_timing_details(connection, do_unqueued_work); 2194e334f550SLars Ellenberg do_unqueued_work(connection); 2195944410e9SLars Ellenberg } 21965ab7d2c0SLars Ellenberg 21978c0785a5SLars Ellenberg if (signal_pending(current)) { 2198b411b363SPhilipp Reisner flush_signals(current); 219919393e10SPhilipp Reisner if (get_t_state(thi) == RUNNING) { 22001ec861ebSAndreas Gruenbacher drbd_warn(connection, "Worker got an unexpected signal\n"); 2201b411b363SPhilipp Reisner continue; 220219393e10SPhilipp Reisner } 2203b411b363SPhilipp Reisner break; 2204b411b363SPhilipp Reisner } 2205b411b363SPhilipp Reisner 2206e77a0a5cSAndreas Gruenbacher if (get_t_state(thi) != RUNNING) 2207b411b363SPhilipp Reisner break; 2208b411b363SPhilipp Reisner 2209729e8b87SLars Ellenberg if (!list_empty(&work_list)) { 22106db7e50aSAndreas Gruenbacher w = list_first_entry(&work_list, struct drbd_work, list); 22116db7e50aSAndreas Gruenbacher list_del_init(&w->list); 2212944410e9SLars Ellenberg update_worker_timing_details(connection, w->cb); 22136db7e50aSAndreas Gruenbacher if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) 22148c0785a5SLars Ellenberg continue; 2215bde89a9eSAndreas Gruenbacher if (connection->cstate >= C_WF_REPORT_PARAMS) 2216bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 2217b411b363SPhilipp Reisner } 2218b411b363SPhilipp Reisner } 2219b411b363SPhilipp Reisner 22208c0785a5SLars Ellenberg do { 2221944410e9SLars Ellenberg if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { 2222944410e9SLars Ellenberg update_worker_timing_details(connection, do_unqueued_work); 2223e334f550SLars Ellenberg do_unqueued_work(connection); 2224944410e9SLars Ellenberg } 2225729e8b87SLars Ellenberg if (!list_empty(&work_list)) { 22266db7e50aSAndreas Gruenbacher w = list_first_entry(&work_list, struct drbd_work, list); 22276db7e50aSAndreas Gruenbacher list_del_init(&w->list); 2228944410e9SLars Ellenberg update_worker_timing_details(connection, w->cb); 22296db7e50aSAndreas Gruenbacher w->cb(w, 1); 2230729e8b87SLars Ellenberg } else 2231bde89a9eSAndreas Gruenbacher dequeue_work_batch(&connection->sender_work, &work_list); 2232e334f550SLars Ellenberg } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags)); 2233b411b363SPhilipp Reisner 2234c141ebdaSPhilipp Reisner rcu_read_lock(); 2235c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2236c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 22370b0ba1efSAndreas Gruenbacher D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE); 2238b30ab791SAndreas Gruenbacher kref_get(&device->kref); 2239c141ebdaSPhilipp Reisner rcu_read_unlock(); 2240b30ab791SAndreas Gruenbacher drbd_device_cleanup(device); 224105a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 2242c141ebdaSPhilipp Reisner rcu_read_lock(); 22430e29d163SPhilipp Reisner } 2244c141ebdaSPhilipp Reisner rcu_read_unlock(); 2245b411b363SPhilipp Reisner 2246b411b363SPhilipp Reisner return 0; 2247b411b363SPhilipp Reisner } 2248