1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_req.h 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2006-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner DRBD is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner DRBD is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner */ 24b411b363SPhilipp Reisner 25b411b363SPhilipp Reisner #ifndef _DRBD_REQ_H 26b411b363SPhilipp Reisner #define _DRBD_REQ_H 27b411b363SPhilipp Reisner 28b411b363SPhilipp Reisner #include <linux/module.h> 29b411b363SPhilipp Reisner 30b411b363SPhilipp Reisner #include <linux/slab.h> 31b411b363SPhilipp Reisner #include <linux/drbd.h> 32b411b363SPhilipp Reisner #include "drbd_int.h" 33b411b363SPhilipp Reisner #include "drbd_wrappers.h" 34b411b363SPhilipp Reisner 35b411b363SPhilipp Reisner /* The request callbacks will be called in irq context by the IDE drivers, 36b411b363SPhilipp Reisner and in Softirqs/Tasklets/BH context by the SCSI drivers, 37b411b363SPhilipp Reisner and by the receiver and worker in kernel-thread context. 38b411b363SPhilipp Reisner Try to get the locking right :) */ 39b411b363SPhilipp Reisner 40b411b363SPhilipp Reisner /* 41b411b363SPhilipp Reisner * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are 42b411b363SPhilipp Reisner * associated with IO requests originating from the block layer above us. 43b411b363SPhilipp Reisner * 44b411b363SPhilipp Reisner * There are quite a few things that may happen to a drbd request 45b411b363SPhilipp Reisner * during its lifetime. 46b411b363SPhilipp Reisner * 47b411b363SPhilipp Reisner * It will be created. 48b411b363SPhilipp Reisner * It will be marked with the intention to be 49b411b363SPhilipp Reisner * submitted to local disk and/or 50b411b363SPhilipp Reisner * send via the network. 51b411b363SPhilipp Reisner * 52b411b363SPhilipp Reisner * It has to be placed on the transfer log and other housekeeping lists, 53b411b363SPhilipp Reisner * In case we have a network connection. 54b411b363SPhilipp Reisner * 55b411b363SPhilipp Reisner * It may be identified as a concurrent (write) request 56b411b363SPhilipp Reisner * and be handled accordingly. 57b411b363SPhilipp Reisner * 58b411b363SPhilipp Reisner * It may me handed over to the local disk subsystem. 59b411b363SPhilipp Reisner * It may be completed by the local disk subsystem, 603ad2f3fbSDaniel Mack * either successfully or with io-error. 61b411b363SPhilipp Reisner * In case it is a READ request, and it failed locally, 62b411b363SPhilipp Reisner * it may be retried remotely. 63b411b363SPhilipp Reisner * 64b411b363SPhilipp Reisner * It may be queued for sending. 65b411b363SPhilipp Reisner * It may be handed over to the network stack, 66b411b363SPhilipp Reisner * which may fail. 67b411b363SPhilipp Reisner * It may be acknowledged by the "peer" according to the wire_protocol in use. 68b411b363SPhilipp Reisner * this may be a negative ack. 69b411b363SPhilipp Reisner * It may receive a faked ack when the network connection is lost and the 70b411b363SPhilipp Reisner * transfer log is cleaned up. 71b411b363SPhilipp Reisner * Sending may be canceled due to network connection loss. 72b411b363SPhilipp Reisner * When it finally has outlived its time, 73b411b363SPhilipp Reisner * corresponding dirty bits in the resync-bitmap may be cleared or set, 74b411b363SPhilipp Reisner * it will be destroyed, 75b411b363SPhilipp Reisner * and completion will be signalled to the originator, 76b411b363SPhilipp Reisner * with or without "success". 77b411b363SPhilipp Reisner */ 78b411b363SPhilipp Reisner 79b411b363SPhilipp Reisner enum drbd_req_event { 80b411b363SPhilipp Reisner created, 81b411b363SPhilipp Reisner to_be_send, 82b411b363SPhilipp Reisner to_be_submitted, 83b411b363SPhilipp Reisner 84b411b363SPhilipp Reisner /* XXX yes, now I am inconsistent... 85b411b363SPhilipp Reisner * these two are not "events" but "actions" 86b411b363SPhilipp Reisner * oh, well... */ 87b411b363SPhilipp Reisner queue_for_net_write, 88b411b363SPhilipp Reisner queue_for_net_read, 89b411b363SPhilipp Reisner 90b411b363SPhilipp Reisner send_canceled, 91b411b363SPhilipp Reisner send_failed, 92b411b363SPhilipp Reisner handed_over_to_network, 93b411b363SPhilipp Reisner connection_lost_while_pending, 94d255e5ffSLars Ellenberg read_retry_remote_canceled, 95b411b363SPhilipp Reisner recv_acked_by_peer, 96b411b363SPhilipp Reisner write_acked_by_peer, 97b411b363SPhilipp Reisner write_acked_by_peer_and_sis, /* and set_in_sync */ 98b411b363SPhilipp Reisner conflict_discarded_by_peer, 99b411b363SPhilipp Reisner neg_acked, 100b411b363SPhilipp Reisner barrier_acked, /* in protocol A and B */ 101b411b363SPhilipp Reisner data_received, /* (remote read) */ 102b411b363SPhilipp Reisner 103b411b363SPhilipp Reisner read_completed_with_error, 104b411b363SPhilipp Reisner read_ahead_completed_with_error, 105b411b363SPhilipp Reisner write_completed_with_error, 106b411b363SPhilipp Reisner completed_ok, 10711b58e73SPhilipp Reisner resend, 108b411b363SPhilipp Reisner nothing, /* for tracing only */ 109b411b363SPhilipp Reisner }; 110b411b363SPhilipp Reisner 111b411b363SPhilipp Reisner /* encoding of request states for now. we don't actually need that many bits. 112b411b363SPhilipp Reisner * we don't need to do atomic bit operations either, since most of the time we 113b411b363SPhilipp Reisner * need to look at the connection state and/or manipulate some lists at the 114b411b363SPhilipp Reisner * same time, so we should hold the request lock anyways. 115b411b363SPhilipp Reisner */ 116b411b363SPhilipp Reisner enum drbd_req_state_bits { 117b411b363SPhilipp Reisner /* 210 118b411b363SPhilipp Reisner * 000: no local possible 119b411b363SPhilipp Reisner * 001: to be submitted 120b411b363SPhilipp Reisner * UNUSED, we could map: 011: submitted, completion still pending 121b411b363SPhilipp Reisner * 110: completed ok 122b411b363SPhilipp Reisner * 010: completed with error 123b411b363SPhilipp Reisner */ 124b411b363SPhilipp Reisner __RQ_LOCAL_PENDING, 125b411b363SPhilipp Reisner __RQ_LOCAL_COMPLETED, 126b411b363SPhilipp Reisner __RQ_LOCAL_OK, 127b411b363SPhilipp Reisner 128b411b363SPhilipp Reisner /* 76543 129b411b363SPhilipp Reisner * 00000: no network possible 130b411b363SPhilipp Reisner * 00001: to be send 131b411b363SPhilipp Reisner * 00011: to be send, on worker queue 132b411b363SPhilipp Reisner * 00101: sent, expecting recv_ack (B) or write_ack (C) 133b411b363SPhilipp Reisner * 11101: sent, 134b411b363SPhilipp Reisner * recv_ack (B) or implicit "ack" (A), 135b411b363SPhilipp Reisner * still waiting for the barrier ack. 136b411b363SPhilipp Reisner * master_bio may already be completed and invalidated. 137b411b363SPhilipp Reisner * 11100: write_acked (C), 138b411b363SPhilipp Reisner * data_received (for remote read, any protocol) 139b411b363SPhilipp Reisner * or finally the barrier ack has arrived (B,A)... 140b411b363SPhilipp Reisner * request can be freed 141b411b363SPhilipp Reisner * 01100: neg-acked (write, protocol C) 142b411b363SPhilipp Reisner * or neg-d-acked (read, any protocol) 143b411b363SPhilipp Reisner * or killed from the transfer log 144b411b363SPhilipp Reisner * during cleanup after connection loss 145b411b363SPhilipp Reisner * request can be freed 146b411b363SPhilipp Reisner * 01000: canceled or send failed... 147b411b363SPhilipp Reisner * request can be freed 148b411b363SPhilipp Reisner */ 149b411b363SPhilipp Reisner 150b411b363SPhilipp Reisner /* if "SENT" is not set, yet, this can still fail or be canceled. 151b411b363SPhilipp Reisner * if "SENT" is set already, we still wait for an Ack packet. 152b411b363SPhilipp Reisner * when cleared, the master_bio may be completed. 153b411b363SPhilipp Reisner * in (B,A) the request object may still linger on the transaction log 154b411b363SPhilipp Reisner * until the corresponding barrier ack comes in */ 155b411b363SPhilipp Reisner __RQ_NET_PENDING, 156b411b363SPhilipp Reisner 157b411b363SPhilipp Reisner /* If it is QUEUED, and it is a WRITE, it is also registered in the 158b411b363SPhilipp Reisner * transfer log. Currently we need this flag to avoid conflicts between 159b411b363SPhilipp Reisner * worker canceling the request and tl_clear_barrier killing it from 160b411b363SPhilipp Reisner * transfer log. We should restructure the code so this conflict does 161b411b363SPhilipp Reisner * no longer occur. */ 162b411b363SPhilipp Reisner __RQ_NET_QUEUED, 163b411b363SPhilipp Reisner 164b411b363SPhilipp Reisner /* well, actually only "handed over to the network stack". 165b411b363SPhilipp Reisner * 166b411b363SPhilipp Reisner * TODO can potentially be dropped because of the similar meaning 167b411b363SPhilipp Reisner * of RQ_NET_SENT and ~RQ_NET_QUEUED. 168b411b363SPhilipp Reisner * however it is not exactly the same. before we drop it 169b411b363SPhilipp Reisner * we must ensure that we can tell a request with network part 170b411b363SPhilipp Reisner * from a request without, regardless of what happens to it. */ 171b411b363SPhilipp Reisner __RQ_NET_SENT, 172b411b363SPhilipp Reisner 173b411b363SPhilipp Reisner /* when set, the request may be freed (if RQ_NET_QUEUED is clear). 174b411b363SPhilipp Reisner * basically this means the corresponding P_BARRIER_ACK was received */ 175b411b363SPhilipp Reisner __RQ_NET_DONE, 176b411b363SPhilipp Reisner 177b411b363SPhilipp Reisner /* whether or not we know (C) or pretend (B,A) that the write 178b411b363SPhilipp Reisner * was successfully written on the peer. 179b411b363SPhilipp Reisner */ 180b411b363SPhilipp Reisner __RQ_NET_OK, 181b411b363SPhilipp Reisner 182b411b363SPhilipp Reisner /* peer called drbd_set_in_sync() for this write */ 183b411b363SPhilipp Reisner __RQ_NET_SIS, 184b411b363SPhilipp Reisner 185b411b363SPhilipp Reisner /* keep this last, its for the RQ_NET_MASK */ 186b411b363SPhilipp Reisner __RQ_NET_MAX, 187288f422eSPhilipp Reisner 188288f422eSPhilipp Reisner /* Set when this is a write, clear for a read */ 189288f422eSPhilipp Reisner __RQ_WRITE, 190b411b363SPhilipp Reisner }; 191b411b363SPhilipp Reisner 192b411b363SPhilipp Reisner #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) 193b411b363SPhilipp Reisner #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) 194b411b363SPhilipp Reisner #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) 195b411b363SPhilipp Reisner 196b411b363SPhilipp Reisner #define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ 197b411b363SPhilipp Reisner 198b411b363SPhilipp Reisner #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) 199b411b363SPhilipp Reisner #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) 200b411b363SPhilipp Reisner #define RQ_NET_SENT (1UL << __RQ_NET_SENT) 201b411b363SPhilipp Reisner #define RQ_NET_DONE (1UL << __RQ_NET_DONE) 202b411b363SPhilipp Reisner #define RQ_NET_OK (1UL << __RQ_NET_OK) 203b411b363SPhilipp Reisner #define RQ_NET_SIS (1UL << __RQ_NET_SIS) 204b411b363SPhilipp Reisner 205b411b363SPhilipp Reisner /* 0x1f8 */ 206b411b363SPhilipp Reisner #define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) 207b411b363SPhilipp Reisner 208288f422eSPhilipp Reisner #define RQ_WRITE (1UL << __RQ_WRITE) 209288f422eSPhilipp Reisner 21011b58e73SPhilipp Reisner /* For waking up the frozen transfer log mod_req() has to return if the request 21111b58e73SPhilipp Reisner should be counted in the epoch object*/ 21211b58e73SPhilipp Reisner #define MR_WRITE_SHIFT 0 21311b58e73SPhilipp Reisner #define MR_WRITE (1 << MR_WRITE_SHIFT) 21411b58e73SPhilipp Reisner #define MR_READ_SHIFT 1 21511b58e73SPhilipp Reisner #define MR_READ (1 << MR_READ_SHIFT) 21611b58e73SPhilipp Reisner 217b411b363SPhilipp Reisner /* epoch entries */ 218b411b363SPhilipp Reisner static inline 219b411b363SPhilipp Reisner struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) 220b411b363SPhilipp Reisner { 221b411b363SPhilipp Reisner BUG_ON(mdev->ee_hash_s == 0); 222b411b363SPhilipp Reisner return mdev->ee_hash + 223b411b363SPhilipp Reisner ((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s); 224b411b363SPhilipp Reisner } 225b411b363SPhilipp Reisner 226b411b363SPhilipp Reisner /* transfer log (drbd_request objects) */ 227b411b363SPhilipp Reisner static inline 228b411b363SPhilipp Reisner struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) 229b411b363SPhilipp Reisner { 230b411b363SPhilipp Reisner BUG_ON(mdev->tl_hash_s == 0); 231b411b363SPhilipp Reisner return mdev->tl_hash + 232b411b363SPhilipp Reisner ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); 233b411b363SPhilipp Reisner } 234b411b363SPhilipp Reisner 235b411b363SPhilipp Reisner /* application reads (drbd_request objects) */ 236b411b363SPhilipp Reisner static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) 237b411b363SPhilipp Reisner { 238b411b363SPhilipp Reisner return mdev->app_reads_hash 239b411b363SPhilipp Reisner + ((unsigned int)(sector) % APP_R_HSIZE); 240b411b363SPhilipp Reisner } 241b411b363SPhilipp Reisner 242b411b363SPhilipp Reisner /* when we receive the answer for a read request, 243b411b363SPhilipp Reisner * verify that we actually know about it */ 244b411b363SPhilipp Reisner static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, 245b411b363SPhilipp Reisner u64 id, sector_t sector) 246b411b363SPhilipp Reisner { 247b411b363SPhilipp Reisner struct hlist_head *slot = ar_hash_slot(mdev, sector); 248b411b363SPhilipp Reisner struct hlist_node *n; 249b411b363SPhilipp Reisner struct drbd_request *req; 250b411b363SPhilipp Reisner 251b411b363SPhilipp Reisner hlist_for_each_entry(req, n, slot, colision) { 252b411b363SPhilipp Reisner if ((unsigned long)req == (unsigned long)id) { 253b411b363SPhilipp Reisner D_ASSERT(req->sector == sector); 254b411b363SPhilipp Reisner return req; 255b411b363SPhilipp Reisner } 256b411b363SPhilipp Reisner } 257b411b363SPhilipp Reisner return NULL; 258b411b363SPhilipp Reisner } 259b411b363SPhilipp Reisner 2605ba82308SPhilipp Reisner static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) 261b411b363SPhilipp Reisner { 262b411b363SPhilipp Reisner struct bio *bio; 263b411b363SPhilipp Reisner bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */ 264b411b363SPhilipp Reisner 265b411b363SPhilipp Reisner req->private_bio = bio; 266b411b363SPhilipp Reisner 267b411b363SPhilipp Reisner bio->bi_private = req; 268b411b363SPhilipp Reisner bio->bi_end_io = drbd_endio_pri; 269b411b363SPhilipp Reisner bio->bi_next = NULL; 270b411b363SPhilipp Reisner } 2715ba82308SPhilipp Reisner 2725ba82308SPhilipp Reisner static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, 2735ba82308SPhilipp Reisner struct bio *bio_src) 2745ba82308SPhilipp Reisner { 2755ba82308SPhilipp Reisner struct drbd_request *req = 2765ba82308SPhilipp Reisner mempool_alloc(drbd_request_mempool, GFP_NOIO); 2775ba82308SPhilipp Reisner if (likely(req)) { 2785ba82308SPhilipp Reisner drbd_req_make_private_bio(req, bio_src); 2795ba82308SPhilipp Reisner 2805ba82308SPhilipp Reisner req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; 2815ba82308SPhilipp Reisner req->mdev = mdev; 2825ba82308SPhilipp Reisner req->master_bio = bio_src; 2835ba82308SPhilipp Reisner req->epoch = 0; 2845ba82308SPhilipp Reisner req->sector = bio_src->bi_sector; 2855ba82308SPhilipp Reisner req->size = bio_src->bi_size; 2865ba82308SPhilipp Reisner req->start_time = jiffies; 2875ba82308SPhilipp Reisner INIT_HLIST_NODE(&req->colision); 2885ba82308SPhilipp Reisner INIT_LIST_HEAD(&req->tl_requests); 2895ba82308SPhilipp Reisner INIT_LIST_HEAD(&req->w.list); 2905ba82308SPhilipp Reisner } 291b411b363SPhilipp Reisner return req; 292b411b363SPhilipp Reisner } 293b411b363SPhilipp Reisner 294b411b363SPhilipp Reisner static inline void drbd_req_free(struct drbd_request *req) 295b411b363SPhilipp Reisner { 296b411b363SPhilipp Reisner mempool_free(req, drbd_request_mempool); 297b411b363SPhilipp Reisner } 298b411b363SPhilipp Reisner 299b411b363SPhilipp Reisner static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) 300b411b363SPhilipp Reisner { 301b411b363SPhilipp Reisner return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); 302b411b363SPhilipp Reisner } 303b411b363SPhilipp Reisner 304b411b363SPhilipp Reisner /* Short lived temporary struct on the stack. 305b411b363SPhilipp Reisner * We could squirrel the error to be returned into 306b411b363SPhilipp Reisner * bio->bi_size, or similar. But that would be too ugly. */ 307b411b363SPhilipp Reisner struct bio_and_error { 308b411b363SPhilipp Reisner struct bio *bio; 309b411b363SPhilipp Reisner int error; 310b411b363SPhilipp Reisner }; 311b411b363SPhilipp Reisner 312b411b363SPhilipp Reisner extern void _req_may_be_done(struct drbd_request *req, 313b411b363SPhilipp Reisner struct bio_and_error *m); 3142a80699fSPhilipp Reisner extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, 315b411b363SPhilipp Reisner struct bio_and_error *m); 316b411b363SPhilipp Reisner extern void complete_master_bio(struct drbd_conf *mdev, 317b411b363SPhilipp Reisner struct bio_and_error *m); 318b411b363SPhilipp Reisner 319b411b363SPhilipp Reisner /* use this if you don't want to deal with calling complete_master_bio() 320b411b363SPhilipp Reisner * outside the spinlock, e.g. when walking some list on cleanup. */ 3212a80699fSPhilipp Reisner static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) 322b411b363SPhilipp Reisner { 323b411b363SPhilipp Reisner struct drbd_conf *mdev = req->mdev; 324b411b363SPhilipp Reisner struct bio_and_error m; 3252a80699fSPhilipp Reisner int rv; 326b411b363SPhilipp Reisner 327b411b363SPhilipp Reisner /* __req_mod possibly frees req, do not touch req after that! */ 3282a80699fSPhilipp Reisner rv = __req_mod(req, what, &m); 329b411b363SPhilipp Reisner if (m.bio) 330b411b363SPhilipp Reisner complete_master_bio(mdev, &m); 3312a80699fSPhilipp Reisner 3322a80699fSPhilipp Reisner return rv; 333b411b363SPhilipp Reisner } 334b411b363SPhilipp Reisner 335b411b363SPhilipp Reisner /* completion of master bio is outside of spinlock. 336b411b363SPhilipp Reisner * If you need it irqsave, do it your self! */ 3372a80699fSPhilipp Reisner static inline int req_mod(struct drbd_request *req, 338b411b363SPhilipp Reisner enum drbd_req_event what) 339b411b363SPhilipp Reisner { 340b411b363SPhilipp Reisner struct drbd_conf *mdev = req->mdev; 341b411b363SPhilipp Reisner struct bio_and_error m; 3422a80699fSPhilipp Reisner int rv; 3432a80699fSPhilipp Reisner 344b411b363SPhilipp Reisner spin_lock_irq(&mdev->req_lock); 3452a80699fSPhilipp Reisner rv = __req_mod(req, what, &m); 346b411b363SPhilipp Reisner spin_unlock_irq(&mdev->req_lock); 347b411b363SPhilipp Reisner 348b411b363SPhilipp Reisner if (m.bio) 349b411b363SPhilipp Reisner complete_master_bio(mdev, &m); 3502a80699fSPhilipp Reisner 3512a80699fSPhilipp Reisner return rv; 352b411b363SPhilipp Reisner } 353b411b363SPhilipp Reisner #endif 354