1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/kthread.h> 31ccd979bdSMark Fasheh #include <linux/pagemap.h> 32ccd979bdSMark Fasheh #include <linux/debugfs.h> 33ccd979bdSMark Fasheh #include <linux/seq_file.h> 34ccd979bdSMark Fasheh 35ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 36ccd979bdSMark Fasheh #include <cluster/masklog.h> 37ccd979bdSMark Fasheh 38ccd979bdSMark Fasheh #include "ocfs2.h" 39d24fbcdaSJoel Becker #include "ocfs2_lockingver.h" 40ccd979bdSMark Fasheh 41ccd979bdSMark Fasheh #include "alloc.h" 42d680efe9SMark Fasheh #include "dcache.h" 43ccd979bdSMark Fasheh #include "dlmglue.h" 44ccd979bdSMark Fasheh #include "extent_map.h" 457f1a37e3STiger Yang #include "file.h" 46ccd979bdSMark Fasheh #include "heartbeat.h" 47ccd979bdSMark Fasheh #include "inode.h" 48ccd979bdSMark Fasheh #include "journal.h" 4924ef1815SJoel Becker #include "stackglue.h" 50ccd979bdSMark Fasheh #include "slot_map.h" 51ccd979bdSMark Fasheh #include "super.h" 52ccd979bdSMark Fasheh #include "uptodate.h" 53ccd979bdSMark Fasheh 54ccd979bdSMark Fasheh #include "buffer_head_io.h" 55ccd979bdSMark Fasheh 56ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 57ccd979bdSMark Fasheh struct list_head mw_item; 58ccd979bdSMark Fasheh int mw_status; 59ccd979bdSMark Fasheh struct completion mw_complete; 60ccd979bdSMark Fasheh unsigned long mw_mask; 61ccd979bdSMark Fasheh unsigned long mw_goal; 62ccd979bdSMark Fasheh }; 63ccd979bdSMark Fasheh 6454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 6554a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 66cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 67ccd979bdSMark Fasheh 68d680efe9SMark Fasheh /* 69cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 70d680efe9SMark Fasheh * 71b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 72d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 73d680efe9SMark Fasheh * 74d680efe9SMark Fasheh */ 75d680efe9SMark Fasheh enum ocfs2_unblock_action { 76d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 77d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 78d680efe9SMark Fasheh * ->post_unlock callback */ 79d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 80d680efe9SMark Fasheh * ->post_unlock() callback. */ 81d680efe9SMark Fasheh }; 82d680efe9SMark Fasheh 83d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 84d680efe9SMark Fasheh int requeue; 85d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 86d680efe9SMark Fasheh }; 87d680efe9SMark Fasheh 88810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 89810d5aebSMark Fasheh int new_level); 90810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 91810d5aebSMark Fasheh 92cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 93cc567d89SMark Fasheh int blocking); 94cc567d89SMark Fasheh 95cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 96cc567d89SMark Fasheh int blocking); 97d680efe9SMark Fasheh 98d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 99d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 100ccd979bdSMark Fasheh 1016cb129f5SAdrian Bunk 1026cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1036cb129f5SAdrian Bunk 1046cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1056cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1066cb129f5SAdrian Bunk const char *function, 1076cb129f5SAdrian Bunk unsigned int line, 1086cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1096cb129f5SAdrian Bunk { 1108f2c9c1bSJoel Becker struct ocfs2_meta_lvb *lvb = 1118f2c9c1bSJoel Becker (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1126cb129f5SAdrian Bunk 1136cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1146cb129f5SAdrian Bunk lockres->l_name, function, line); 1156cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1166cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1176cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1186cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1196cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1206cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1216cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1226cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1236cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1246cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1256cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1266cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1276cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1286cb129f5SAdrian Bunk } 1296cb129f5SAdrian Bunk 1306cb129f5SAdrian Bunk 131f625c979SMark Fasheh /* 132f625c979SMark Fasheh * OCFS2 Lock Resource Operations 133f625c979SMark Fasheh * 134f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1350d5dc6c2SMark Fasheh * 1360d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1370d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1380d5dc6c2SMark Fasheh * 1390d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1400d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1410d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1420d5dc6c2SMark Fasheh * destruction time). 143f625c979SMark Fasheh */ 144ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 14554a7e755SMark Fasheh /* 14654a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 14754a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 14854a7e755SMark Fasheh */ 14954a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 150b5e500e2SMark Fasheh 1510d5dc6c2SMark Fasheh /* 15234d024f8SMark Fasheh * Optionally called in the downconvert thread after a 15334d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 15434d024f8SMark Fasheh * after this callback is called, so it is safe to free 15534d024f8SMark Fasheh * memory, etc. 1560d5dc6c2SMark Fasheh * 1570d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1580d5dc6c2SMark Fasheh * by ->downconvert_worker() 1590d5dc6c2SMark Fasheh */ 160d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 161f625c979SMark Fasheh 162f625c979SMark Fasheh /* 16316d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 16416d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 16516d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 16616d5b956SMark Fasheh * 16716d5b956SMark Fasheh * For most locks, the default checks that there are no 16816d5b956SMark Fasheh * incompatible holders are sufficient. 16916d5b956SMark Fasheh * 17016d5b956SMark Fasheh * Called with the lockres spinlock held. 17116d5b956SMark Fasheh */ 17216d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 17316d5b956SMark Fasheh 17416d5b956SMark Fasheh /* 1755ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1765ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1775ef0d4eaSMark Fasheh * 1785ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1795ef0d4eaSMark Fasheh * in the flags field. 1805ef0d4eaSMark Fasheh * 1815ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 1825ef0d4eaSMark Fasheh */ 1835ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 1845ef0d4eaSMark Fasheh 1855ef0d4eaSMark Fasheh /* 186cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 187cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 188cc567d89SMark Fasheh * any locks held so the function can do work that might 189cc567d89SMark Fasheh * schedule (syncing out data, etc). 190cc567d89SMark Fasheh * 191cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 192cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 193cc567d89SMark Fasheh */ 194cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 195cc567d89SMark Fasheh 196cc567d89SMark Fasheh /* 197f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 198f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 199f625c979SMark Fasheh */ 200f625c979SMark Fasheh int flags; 201ccd979bdSMark Fasheh }; 202ccd979bdSMark Fasheh 203f625c979SMark Fasheh /* 204f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 205f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 206f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 207f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 208f625c979SMark Fasheh * expected that the locking wrapper will clear the 209f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 210f625c979SMark Fasheh */ 211f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 212f625c979SMark Fasheh 213b80fc012SMark Fasheh /* 2145ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2155ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 216b80fc012SMark Fasheh */ 217b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 218b80fc012SMark Fasheh 219ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 22054a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 221f625c979SMark Fasheh .flags = 0, 222ccd979bdSMark Fasheh }; 223ccd979bdSMark Fasheh 224e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 22554a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 226810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 227810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 228f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 229b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 230ccd979bdSMark Fasheh }; 231ccd979bdSMark Fasheh 232ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 233f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 234ccd979bdSMark Fasheh }; 235ccd979bdSMark Fasheh 236ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 237f625c979SMark Fasheh .flags = 0, 238ccd979bdSMark Fasheh }; 239ccd979bdSMark Fasheh 240d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 24154a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 242d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 243cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 244f625c979SMark Fasheh .flags = 0, 245d680efe9SMark Fasheh }; 246d680efe9SMark Fasheh 24750008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 24850008630STiger Yang .get_osb = ocfs2_get_inode_osb, 24950008630STiger Yang .flags = 0, 25050008630STiger Yang }; 25150008630STiger Yang 252cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 253cf8e06f1SMark Fasheh .get_osb = ocfs2_get_file_osb, 254cf8e06f1SMark Fasheh .flags = 0, 255cf8e06f1SMark Fasheh }; 256cf8e06f1SMark Fasheh 257ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 258ccd979bdSMark Fasheh { 259ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 26050008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 26150008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 262ccd979bdSMark Fasheh } 263ccd979bdSMark Fasheh 264ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 265ccd979bdSMark Fasheh { 266ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 267ccd979bdSMark Fasheh 268ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 269ccd979bdSMark Fasheh } 270ccd979bdSMark Fasheh 271d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 272d680efe9SMark Fasheh { 273d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 274d680efe9SMark Fasheh 275d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 276d680efe9SMark Fasheh } 277d680efe9SMark Fasheh 27854a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 27954a7e755SMark Fasheh { 28054a7e755SMark Fasheh if (lockres->l_ops->get_osb) 28154a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 28254a7e755SMark Fasheh 28354a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 28454a7e755SMark Fasheh } 28554a7e755SMark Fasheh 286ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 287ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 288ccd979bdSMark Fasheh int level, 289bd3e7610SJoel Becker u32 dlm_flags); 290ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 291ccd979bdSMark Fasheh int wanted); 292ccd979bdSMark Fasheh static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 293ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 294ccd979bdSMark Fasheh int level); 295ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 296ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 297ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 298ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 299ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 300ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 301ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 302ccd979bdSMark Fasheh int convert); 3037431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 3047431cd7eSJoel Becker mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 3057431cd7eSJoel Becker _err, _func, _lockres->l_name); \ 306ccd979bdSMark Fasheh } while (0) 30734d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 30834d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 309ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 310e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 311ccd979bdSMark Fasheh struct buffer_head **bh); 312ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 313ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 314de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 315cf8e06f1SMark Fasheh int new_level); 316cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 317cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres, 318cf8e06f1SMark Fasheh int new_level, 319de551246SJoel Becker int lvb, 320de551246SJoel Becker unsigned int generation); 321cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 322cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 323cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 324cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 325cf8e06f1SMark Fasheh 326ccd979bdSMark Fasheh 327ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 328ccd979bdSMark Fasheh u64 blkno, 329ccd979bdSMark Fasheh u32 generation, 330ccd979bdSMark Fasheh char *name) 331ccd979bdSMark Fasheh { 332ccd979bdSMark Fasheh int len; 333ccd979bdSMark Fasheh 334ccd979bdSMark Fasheh mlog_entry_void(); 335ccd979bdSMark Fasheh 336ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 337ccd979bdSMark Fasheh 338b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 339b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 340b0697053SMark Fasheh (long long)blkno, generation); 341ccd979bdSMark Fasheh 342ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 343ccd979bdSMark Fasheh 344ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 345ccd979bdSMark Fasheh 346ccd979bdSMark Fasheh mlog_exit_void(); 347ccd979bdSMark Fasheh } 348ccd979bdSMark Fasheh 34934af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 350ccd979bdSMark Fasheh 351ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 352ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 353ccd979bdSMark Fasheh { 354ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 355ccd979bdSMark Fasheh 356ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 357ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 358ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 359ccd979bdSMark Fasheh } 360ccd979bdSMark Fasheh 361ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 362ccd979bdSMark Fasheh { 363ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 364ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 365ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 366ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 367ccd979bdSMark Fasheh } 368ccd979bdSMark Fasheh 369ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 370ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 371ccd979bdSMark Fasheh enum ocfs2_lock_type type, 372ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 373ccd979bdSMark Fasheh void *priv) 374ccd979bdSMark Fasheh { 375ccd979bdSMark Fasheh res->l_type = type; 376ccd979bdSMark Fasheh res->l_ops = ops; 377ccd979bdSMark Fasheh res->l_priv = priv; 378ccd979bdSMark Fasheh 379bd3e7610SJoel Becker res->l_level = DLM_LOCK_IV; 380bd3e7610SJoel Becker res->l_requested = DLM_LOCK_IV; 381bd3e7610SJoel Becker res->l_blocking = DLM_LOCK_IV; 382ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 383ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 384ccd979bdSMark Fasheh 385ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 386ccd979bdSMark Fasheh 387ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 388ccd979bdSMark Fasheh } 389ccd979bdSMark Fasheh 390ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 391ccd979bdSMark Fasheh { 392ccd979bdSMark Fasheh /* This also clears out the lock status block */ 393ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 394ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 395ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 396ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 397ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 398ccd979bdSMark Fasheh } 399ccd979bdSMark Fasheh 400ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 401ccd979bdSMark Fasheh enum ocfs2_lock_type type, 40224c19ef4SMark Fasheh unsigned int generation, 403ccd979bdSMark Fasheh struct inode *inode) 404ccd979bdSMark Fasheh { 405ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 406ccd979bdSMark Fasheh 407ccd979bdSMark Fasheh switch(type) { 408ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 409ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 410ccd979bdSMark Fasheh break; 411ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 412e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 413ccd979bdSMark Fasheh break; 41450008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 41550008630STiger Yang ops = &ocfs2_inode_open_lops; 41650008630STiger Yang break; 417ccd979bdSMark Fasheh default: 418ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 419ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 420ccd979bdSMark Fasheh break; 421ccd979bdSMark Fasheh }; 422ccd979bdSMark Fasheh 423d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 42424c19ef4SMark Fasheh generation, res->l_name); 425d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 426d680efe9SMark Fasheh } 427d680efe9SMark Fasheh 42854a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 42954a7e755SMark Fasheh { 43054a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 43154a7e755SMark Fasheh 43254a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 43354a7e755SMark Fasheh } 43454a7e755SMark Fasheh 435cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 436cf8e06f1SMark Fasheh { 437cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = lockres->l_priv; 438cf8e06f1SMark Fasheh 439cf8e06f1SMark Fasheh return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 440cf8e06f1SMark Fasheh } 441cf8e06f1SMark Fasheh 442d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 443d680efe9SMark Fasheh { 444d680efe9SMark Fasheh __be64 inode_blkno_be; 445d680efe9SMark Fasheh 446d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 447d680efe9SMark Fasheh sizeof(__be64)); 448d680efe9SMark Fasheh 449d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 450d680efe9SMark Fasheh } 451d680efe9SMark Fasheh 45254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 45354a7e755SMark Fasheh { 45454a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 45554a7e755SMark Fasheh 45654a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 45754a7e755SMark Fasheh } 45854a7e755SMark Fasheh 459d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 460d680efe9SMark Fasheh u64 parent, struct inode *inode) 461d680efe9SMark Fasheh { 462d680efe9SMark Fasheh int len; 463d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 464d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 465d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 466d680efe9SMark Fasheh 467d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 468d680efe9SMark Fasheh 469d680efe9SMark Fasheh /* 470d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 471d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 472d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 473d680efe9SMark Fasheh * want error prints to show something without garbling the 474d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 475d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 476d680efe9SMark Fasheh * binary lock names. The stringified names have been a 477d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 478d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 479d680efe9SMark Fasheh * 480d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 481d680efe9SMark Fasheh * name size stays the same though - the last part is all 482d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 483d680efe9SMark Fasheh */ 484d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 485d680efe9SMark Fasheh "%c%016llx", 486d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 487d680efe9SMark Fasheh (long long)parent); 488d680efe9SMark Fasheh 489d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 490d680efe9SMark Fasheh 491d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 492d680efe9SMark Fasheh sizeof(__be64)); 493d680efe9SMark Fasheh 494d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 495d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 496d680efe9SMark Fasheh dl); 497ccd979bdSMark Fasheh } 498ccd979bdSMark Fasheh 499ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 500ccd979bdSMark Fasheh struct ocfs2_super *osb) 501ccd979bdSMark Fasheh { 502ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 503ccd979bdSMark Fasheh * once on it manually. */ 504ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 505d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 506d680efe9SMark Fasheh 0, res->l_name); 507ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 508ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 509ccd979bdSMark Fasheh } 510ccd979bdSMark Fasheh 511ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 512ccd979bdSMark Fasheh struct ocfs2_super *osb) 513ccd979bdSMark Fasheh { 514ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 515ccd979bdSMark Fasheh * once on it manually. */ 516ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 517d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 518d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 519ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 520ccd979bdSMark Fasheh } 521ccd979bdSMark Fasheh 522cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 523cf8e06f1SMark Fasheh struct ocfs2_file_private *fp) 524cf8e06f1SMark Fasheh { 525cf8e06f1SMark Fasheh struct inode *inode = fp->fp_file->f_mapping->host; 526cf8e06f1SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 527cf8e06f1SMark Fasheh 528cf8e06f1SMark Fasheh ocfs2_lock_res_init_once(lockres); 529cf8e06f1SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 530cf8e06f1SMark Fasheh inode->i_generation, lockres->l_name); 531cf8e06f1SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 532cf8e06f1SMark Fasheh OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 533cf8e06f1SMark Fasheh fp); 534cf8e06f1SMark Fasheh lockres->l_flags |= OCFS2_LOCK_NOCACHE; 535cf8e06f1SMark Fasheh } 536cf8e06f1SMark Fasheh 537ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 538ccd979bdSMark Fasheh { 539ccd979bdSMark Fasheh mlog_entry_void(); 540ccd979bdSMark Fasheh 541ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 542ccd979bdSMark Fasheh return; 543ccd979bdSMark Fasheh 544ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 545ccd979bdSMark Fasheh 546ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 547ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 548ccd979bdSMark Fasheh res->l_name); 549ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 550ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 551ccd979bdSMark Fasheh res->l_name); 552ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 553ccd979bdSMark Fasheh "Lockres %s is locked\n", 554ccd979bdSMark Fasheh res->l_name); 555ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 556ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 557ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 558ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 559ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 560ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 561ccd979bdSMark Fasheh 562ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 563ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 564ccd979bdSMark Fasheh 565ccd979bdSMark Fasheh res->l_flags = 0UL; 566ccd979bdSMark Fasheh mlog_exit_void(); 567ccd979bdSMark Fasheh } 568ccd979bdSMark Fasheh 569ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 570ccd979bdSMark Fasheh int level) 571ccd979bdSMark Fasheh { 572ccd979bdSMark Fasheh mlog_entry_void(); 573ccd979bdSMark Fasheh 574ccd979bdSMark Fasheh BUG_ON(!lockres); 575ccd979bdSMark Fasheh 576ccd979bdSMark Fasheh switch(level) { 577bd3e7610SJoel Becker case DLM_LOCK_EX: 578ccd979bdSMark Fasheh lockres->l_ex_holders++; 579ccd979bdSMark Fasheh break; 580bd3e7610SJoel Becker case DLM_LOCK_PR: 581ccd979bdSMark Fasheh lockres->l_ro_holders++; 582ccd979bdSMark Fasheh break; 583ccd979bdSMark Fasheh default: 584ccd979bdSMark Fasheh BUG(); 585ccd979bdSMark Fasheh } 586ccd979bdSMark Fasheh 587ccd979bdSMark Fasheh mlog_exit_void(); 588ccd979bdSMark Fasheh } 589ccd979bdSMark Fasheh 590ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 591ccd979bdSMark Fasheh int level) 592ccd979bdSMark Fasheh { 593ccd979bdSMark Fasheh mlog_entry_void(); 594ccd979bdSMark Fasheh 595ccd979bdSMark Fasheh BUG_ON(!lockres); 596ccd979bdSMark Fasheh 597ccd979bdSMark Fasheh switch(level) { 598bd3e7610SJoel Becker case DLM_LOCK_EX: 599ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 600ccd979bdSMark Fasheh lockres->l_ex_holders--; 601ccd979bdSMark Fasheh break; 602bd3e7610SJoel Becker case DLM_LOCK_PR: 603ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 604ccd979bdSMark Fasheh lockres->l_ro_holders--; 605ccd979bdSMark Fasheh break; 606ccd979bdSMark Fasheh default: 607ccd979bdSMark Fasheh BUG(); 608ccd979bdSMark Fasheh } 609ccd979bdSMark Fasheh mlog_exit_void(); 610ccd979bdSMark Fasheh } 611ccd979bdSMark Fasheh 612ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 613ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 614ccd979bdSMark Fasheh * lock types are added. */ 615ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 616ccd979bdSMark Fasheh { 617bd3e7610SJoel Becker int new_level = DLM_LOCK_EX; 618ccd979bdSMark Fasheh 619bd3e7610SJoel Becker if (level == DLM_LOCK_EX) 620bd3e7610SJoel Becker new_level = DLM_LOCK_NL; 621bd3e7610SJoel Becker else if (level == DLM_LOCK_PR) 622bd3e7610SJoel Becker new_level = DLM_LOCK_PR; 623ccd979bdSMark Fasheh return new_level; 624ccd979bdSMark Fasheh } 625ccd979bdSMark Fasheh 626ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 627ccd979bdSMark Fasheh unsigned long newflags) 628ccd979bdSMark Fasheh { 629800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 630ccd979bdSMark Fasheh 631ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 632ccd979bdSMark Fasheh 633ccd979bdSMark Fasheh lockres->l_flags = newflags; 634ccd979bdSMark Fasheh 635800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 636ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 637ccd979bdSMark Fasheh continue; 638ccd979bdSMark Fasheh 639ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 640ccd979bdSMark Fasheh mw->mw_status = 0; 641ccd979bdSMark Fasheh complete(&mw->mw_complete); 642ccd979bdSMark Fasheh } 643ccd979bdSMark Fasheh } 644ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 645ccd979bdSMark Fasheh { 646ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 647ccd979bdSMark Fasheh } 648ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 649ccd979bdSMark Fasheh unsigned long clear) 650ccd979bdSMark Fasheh { 651ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 652ccd979bdSMark Fasheh } 653ccd979bdSMark Fasheh 654ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 655ccd979bdSMark Fasheh { 656ccd979bdSMark Fasheh mlog_entry_void(); 657ccd979bdSMark Fasheh 658ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 659ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 660ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 661bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 662ccd979bdSMark Fasheh 663ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 664ccd979bdSMark Fasheh if (lockres->l_level <= 665ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 666bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_NL; 667ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 668ccd979bdSMark Fasheh } 669ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 670ccd979bdSMark Fasheh 671ccd979bdSMark Fasheh mlog_exit_void(); 672ccd979bdSMark Fasheh } 673ccd979bdSMark Fasheh 674ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 675ccd979bdSMark Fasheh { 676ccd979bdSMark Fasheh mlog_entry_void(); 677ccd979bdSMark Fasheh 678ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 679ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 680ccd979bdSMark Fasheh 681ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 682ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 683ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 684ccd979bdSMark Fasheh * update */ 685bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_NL && 686f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 687ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 688ccd979bdSMark Fasheh 689ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 690ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 691ccd979bdSMark Fasheh 692ccd979bdSMark Fasheh mlog_exit_void(); 693ccd979bdSMark Fasheh } 694ccd979bdSMark Fasheh 695ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 696ccd979bdSMark Fasheh { 697ccd979bdSMark Fasheh mlog_entry_void(); 698ccd979bdSMark Fasheh 6993cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 700ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 701ccd979bdSMark Fasheh 702bd3e7610SJoel Becker if (lockres->l_requested > DLM_LOCK_NL && 703f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 704f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 705ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 706ccd979bdSMark Fasheh 707ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 708ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 709ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 710ccd979bdSMark Fasheh 711ccd979bdSMark Fasheh mlog_exit_void(); 712ccd979bdSMark Fasheh } 713ccd979bdSMark Fasheh 714ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 715ccd979bdSMark Fasheh int level) 716ccd979bdSMark Fasheh { 717ccd979bdSMark Fasheh int needs_downconvert = 0; 718ccd979bdSMark Fasheh mlog_entry_void(); 719ccd979bdSMark Fasheh 720ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 721ccd979bdSMark Fasheh 722ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 723ccd979bdSMark Fasheh 724ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 725ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 726ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 727ccd979bdSMark Fasheh * blocking. this also catches the case where we get 728ccd979bdSMark Fasheh * duplicate BASTs */ 729ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 730ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 731ccd979bdSMark Fasheh needs_downconvert = 1; 732ccd979bdSMark Fasheh 733ccd979bdSMark Fasheh lockres->l_blocking = level; 734ccd979bdSMark Fasheh } 735ccd979bdSMark Fasheh 736ccd979bdSMark Fasheh mlog_exit(needs_downconvert); 737ccd979bdSMark Fasheh return needs_downconvert; 738ccd979bdSMark Fasheh } 739ccd979bdSMark Fasheh 740de551246SJoel Becker /* 741de551246SJoel Becker * OCFS2_LOCK_PENDING and l_pending_gen. 742de551246SJoel Becker * 743de551246SJoel Becker * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 744de551246SJoel Becker * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 745de551246SJoel Becker * for more details on the race. 746de551246SJoel Becker * 747de551246SJoel Becker * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 748de551246SJoel Becker * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 749de551246SJoel Becker * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 750de551246SJoel Becker * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 751de551246SJoel Becker * the caller is going to try to clear PENDING again. If nothing else is 752de551246SJoel Becker * happening, __lockres_clear_pending() sees PENDING is unset and does 753de551246SJoel Becker * nothing. 754de551246SJoel Becker * 755de551246SJoel Becker * But what if another path (eg downconvert thread) has just started a 756de551246SJoel Becker * new locking action? The other path has re-set PENDING. Our path 757de551246SJoel Becker * cannot clear PENDING, because that will re-open the original race 758de551246SJoel Becker * window. 759de551246SJoel Becker * 760de551246SJoel Becker * [Example] 761de551246SJoel Becker * 762de551246SJoel Becker * ocfs2_meta_lock() 763de551246SJoel Becker * ocfs2_cluster_lock() 764de551246SJoel Becker * set BUSY 765de551246SJoel Becker * set PENDING 766de551246SJoel Becker * drop l_lock 767de551246SJoel Becker * ocfs2_dlm_lock() 768de551246SJoel Becker * ocfs2_locking_ast() ocfs2_downconvert_thread() 769de551246SJoel Becker * clear PENDING ocfs2_unblock_lock() 770de551246SJoel Becker * take_l_lock 771de551246SJoel Becker * !BUSY 772de551246SJoel Becker * ocfs2_prepare_downconvert() 773de551246SJoel Becker * set BUSY 774de551246SJoel Becker * set PENDING 775de551246SJoel Becker * drop l_lock 776de551246SJoel Becker * take l_lock 777de551246SJoel Becker * clear PENDING 778de551246SJoel Becker * drop l_lock 779de551246SJoel Becker * <window> 780de551246SJoel Becker * ocfs2_dlm_lock() 781de551246SJoel Becker * 782de551246SJoel Becker * So as you can see, we now have a window where l_lock is not held, 783de551246SJoel Becker * PENDING is not set, and ocfs2_dlm_lock() has not been called. 784de551246SJoel Becker * 785de551246SJoel Becker * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 786de551246SJoel Becker * set by ocfs2_prepare_downconvert(). That wasn't nice. 787de551246SJoel Becker * 788de551246SJoel Becker * To solve this we introduce l_pending_gen. A call to 789de551246SJoel Becker * lockres_clear_pending() will only do so when it is passed a generation 790de551246SJoel Becker * number that matches the lockres. lockres_set_pending() will return the 791de551246SJoel Becker * current generation number. When ocfs2_cluster_lock() goes to clear 792de551246SJoel Becker * PENDING, it passes the generation it got from set_pending(). In our 793de551246SJoel Becker * example above, the generation numbers will *not* match. Thus, 794de551246SJoel Becker * ocfs2_cluster_lock() will not clear the PENDING set by 795de551246SJoel Becker * ocfs2_prepare_downconvert(). 796de551246SJoel Becker */ 797de551246SJoel Becker 798de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */ 799de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 800de551246SJoel Becker unsigned int generation, 801de551246SJoel Becker struct ocfs2_super *osb) 802de551246SJoel Becker { 803de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 804de551246SJoel Becker 805de551246SJoel Becker /* 806de551246SJoel Becker * The ast and locking functions can race us here. The winner 807de551246SJoel Becker * will clear pending, the loser will not. 808de551246SJoel Becker */ 809de551246SJoel Becker if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 810de551246SJoel Becker (lockres->l_pending_gen != generation)) 811de551246SJoel Becker return; 812de551246SJoel Becker 813de551246SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 814de551246SJoel Becker lockres->l_pending_gen++; 815de551246SJoel Becker 816de551246SJoel Becker /* 817de551246SJoel Becker * The downconvert thread may have skipped us because we 818de551246SJoel Becker * were PENDING. Wake it up. 819de551246SJoel Becker */ 820de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 821de551246SJoel Becker ocfs2_wake_downconvert_thread(osb); 822de551246SJoel Becker } 823de551246SJoel Becker 824de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */ 825de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 826de551246SJoel Becker unsigned int generation, 827de551246SJoel Becker struct ocfs2_super *osb) 828de551246SJoel Becker { 829de551246SJoel Becker unsigned long flags; 830de551246SJoel Becker 831de551246SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 832de551246SJoel Becker __lockres_clear_pending(lockres, generation, osb); 833de551246SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 834de551246SJoel Becker } 835de551246SJoel Becker 836de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 837de551246SJoel Becker { 838de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 839de551246SJoel Becker BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 840de551246SJoel Becker 841de551246SJoel Becker lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 842de551246SJoel Becker 843de551246SJoel Becker return lockres->l_pending_gen; 844de551246SJoel Becker } 845de551246SJoel Becker 846de551246SJoel Becker 847aa2623adSMark Fasheh static void ocfs2_blocking_ast(void *opaque, int level) 848ccd979bdSMark Fasheh { 849aa2623adSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 850aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 851ccd979bdSMark Fasheh int needs_downconvert; 852ccd979bdSMark Fasheh unsigned long flags; 853ccd979bdSMark Fasheh 854bd3e7610SJoel Becker BUG_ON(level <= DLM_LOCK_NL); 855ccd979bdSMark Fasheh 856aa2623adSMark Fasheh mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 857aa2623adSMark Fasheh lockres->l_name, level, lockres->l_level, 858aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 859aa2623adSMark Fasheh 860cf8e06f1SMark Fasheh /* 861cf8e06f1SMark Fasheh * We can skip the bast for locks which don't enable caching - 862cf8e06f1SMark Fasheh * they'll be dropped at the earliest possible time anyway. 863cf8e06f1SMark Fasheh */ 864cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 865cf8e06f1SMark Fasheh return; 866cf8e06f1SMark Fasheh 867ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 868ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 869ccd979bdSMark Fasheh if (needs_downconvert) 870ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 871ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 872ccd979bdSMark Fasheh 873d680efe9SMark Fasheh wake_up(&lockres->l_event); 874d680efe9SMark Fasheh 87534d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 876ccd979bdSMark Fasheh } 877ccd979bdSMark Fasheh 878e92d57dfSMark Fasheh static void ocfs2_locking_ast(void *opaque) 879ccd979bdSMark Fasheh { 880e92d57dfSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 881de551246SJoel Becker struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 882ccd979bdSMark Fasheh unsigned long flags; 8831693a5c0SDavid Teigland int status; 884ccd979bdSMark Fasheh 885ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 886ccd979bdSMark Fasheh 8871693a5c0SDavid Teigland status = ocfs2_dlm_lock_status(&lockres->l_lksb); 8881693a5c0SDavid Teigland 8891693a5c0SDavid Teigland if (status == -EAGAIN) { 8901693a5c0SDavid Teigland lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 8911693a5c0SDavid Teigland goto out; 8921693a5c0SDavid Teigland } 8931693a5c0SDavid Teigland 8941693a5c0SDavid Teigland if (status) { 8958f2c9c1bSJoel Becker mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 8961693a5c0SDavid Teigland lockres->l_name, status); 897ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 898ccd979bdSMark Fasheh return; 899ccd979bdSMark Fasheh } 900ccd979bdSMark Fasheh 901ccd979bdSMark Fasheh switch(lockres->l_action) { 902ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 903ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 904e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 905ccd979bdSMark Fasheh break; 906ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 907ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 908ccd979bdSMark Fasheh break; 909ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 910ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 911ccd979bdSMark Fasheh break; 912ccd979bdSMark Fasheh default: 913e92d57dfSMark Fasheh mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 914e92d57dfSMark Fasheh "lockres flags = 0x%lx, unlock action: %u\n", 915e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 916e92d57dfSMark Fasheh lockres->l_unlock_action); 917ccd979bdSMark Fasheh BUG(); 918ccd979bdSMark Fasheh } 9191693a5c0SDavid Teigland out: 920ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 921ccd979bdSMark Fasheh * can catch it. */ 922ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 923ccd979bdSMark Fasheh 924de551246SJoel Becker /* Did we try to cancel this lock? Clear that state */ 925de551246SJoel Becker if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 926de551246SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 927de551246SJoel Becker 928de551246SJoel Becker /* 929de551246SJoel Becker * We may have beaten the locking functions here. We certainly 930de551246SJoel Becker * know that dlm_lock() has been called :-) 931de551246SJoel Becker * Because we can't have two lock calls in flight at once, we 932de551246SJoel Becker * can use lockres->l_pending_gen. 933de551246SJoel Becker */ 934de551246SJoel Becker __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 935de551246SJoel Becker 936ccd979bdSMark Fasheh wake_up(&lockres->l_event); 937d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 938ccd979bdSMark Fasheh } 939ccd979bdSMark Fasheh 940ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 941ccd979bdSMark Fasheh int convert) 942ccd979bdSMark Fasheh { 943ccd979bdSMark Fasheh unsigned long flags; 944ccd979bdSMark Fasheh 945ccd979bdSMark Fasheh mlog_entry_void(); 946ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 947ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 948ccd979bdSMark Fasheh if (convert) 949ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 950ccd979bdSMark Fasheh else 951ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 952ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 953ccd979bdSMark Fasheh 954ccd979bdSMark Fasheh wake_up(&lockres->l_event); 955ccd979bdSMark Fasheh mlog_exit_void(); 956ccd979bdSMark Fasheh } 957ccd979bdSMark Fasheh 958ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 959ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 960ccd979bdSMark Fasheh * to do the right thing in that case. 961ccd979bdSMark Fasheh */ 962ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 963ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 964ccd979bdSMark Fasheh int level, 965bd3e7610SJoel Becker u32 dlm_flags) 966ccd979bdSMark Fasheh { 967ccd979bdSMark Fasheh int ret = 0; 968ccd979bdSMark Fasheh unsigned long flags; 969de551246SJoel Becker unsigned int gen; 970ccd979bdSMark Fasheh 971ccd979bdSMark Fasheh mlog_entry_void(); 972ccd979bdSMark Fasheh 973bd3e7610SJoel Becker mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 974ccd979bdSMark Fasheh dlm_flags); 975ccd979bdSMark Fasheh 976ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 977ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 978ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 979ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 980ccd979bdSMark Fasheh goto bail; 981ccd979bdSMark Fasheh } 982ccd979bdSMark Fasheh 983ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 984ccd979bdSMark Fasheh lockres->l_requested = level; 985ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 986de551246SJoel Becker gen = lockres_set_pending(lockres); 987ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 988ccd979bdSMark Fasheh 9894670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 990ccd979bdSMark Fasheh level, 991ccd979bdSMark Fasheh &lockres->l_lksb, 992ccd979bdSMark Fasheh dlm_flags, 993ccd979bdSMark Fasheh lockres->l_name, 994f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 99524ef1815SJoel Becker lockres); 996de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 9977431cd7eSJoel Becker if (ret) { 9987431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 999ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1000ccd979bdSMark Fasheh } 1001ccd979bdSMark Fasheh 10027431cd7eSJoel Becker mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1003ccd979bdSMark Fasheh 1004ccd979bdSMark Fasheh bail: 1005ccd979bdSMark Fasheh mlog_exit(ret); 1006ccd979bdSMark Fasheh return ret; 1007ccd979bdSMark Fasheh } 1008ccd979bdSMark Fasheh 1009ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1010ccd979bdSMark Fasheh int flag) 1011ccd979bdSMark Fasheh { 1012ccd979bdSMark Fasheh unsigned long flags; 1013ccd979bdSMark Fasheh int ret; 1014ccd979bdSMark Fasheh 1015ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1016ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 1017ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1018ccd979bdSMark Fasheh 1019ccd979bdSMark Fasheh return ret; 1020ccd979bdSMark Fasheh } 1021ccd979bdSMark Fasheh 1022ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1023ccd979bdSMark Fasheh 1024ccd979bdSMark Fasheh { 1025ccd979bdSMark Fasheh wait_event(lockres->l_event, 1026ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1027ccd979bdSMark Fasheh } 1028ccd979bdSMark Fasheh 1029ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1030ccd979bdSMark Fasheh 1031ccd979bdSMark Fasheh { 1032ccd979bdSMark Fasheh wait_event(lockres->l_event, 1033ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1034ccd979bdSMark Fasheh } 1035ccd979bdSMark Fasheh 1036ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 1037ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 1038ccd979bdSMark Fasheh * level will be compatible with it. */ 1039ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1040ccd979bdSMark Fasheh int wanted) 1041ccd979bdSMark Fasheh { 1042ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1043ccd979bdSMark Fasheh 1044ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1045ccd979bdSMark Fasheh } 1046ccd979bdSMark Fasheh 1047ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1048ccd979bdSMark Fasheh { 1049ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 1050ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1051ccd979bdSMark Fasheh } 1052ccd979bdSMark Fasheh 1053ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1054ccd979bdSMark Fasheh { 1055ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 1056ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1057ccd979bdSMark Fasheh INIT_COMPLETION(mw->mw_complete); 1058ccd979bdSMark Fasheh return mw->mw_status; 1059ccd979bdSMark Fasheh } 1060ccd979bdSMark Fasheh 1061ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1062ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 1063ccd979bdSMark Fasheh unsigned long mask, 1064ccd979bdSMark Fasheh unsigned long goal) 1065ccd979bdSMark Fasheh { 1066ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 1067ccd979bdSMark Fasheh 1068ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 1069ccd979bdSMark Fasheh 1070ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1071ccd979bdSMark Fasheh mw->mw_mask = mask; 1072ccd979bdSMark Fasheh mw->mw_goal = goal; 1073ccd979bdSMark Fasheh } 1074ccd979bdSMark Fasheh 1075ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1076ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 1077ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1078ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 1079ccd979bdSMark Fasheh { 1080ccd979bdSMark Fasheh unsigned long flags; 1081ccd979bdSMark Fasheh int ret = 0; 1082ccd979bdSMark Fasheh 1083ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1084ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 1085ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1086ccd979bdSMark Fasheh ret = -EBUSY; 1087ccd979bdSMark Fasheh 1088ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 1089ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1090ccd979bdSMark Fasheh } 1091ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1092ccd979bdSMark Fasheh 1093ccd979bdSMark Fasheh return ret; 1094ccd979bdSMark Fasheh 1095ccd979bdSMark Fasheh } 1096ccd979bdSMark Fasheh 1097cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1098cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres) 1099cf8e06f1SMark Fasheh { 1100cf8e06f1SMark Fasheh int ret; 1101cf8e06f1SMark Fasheh 1102cf8e06f1SMark Fasheh ret = wait_for_completion_interruptible(&mw->mw_complete); 1103cf8e06f1SMark Fasheh if (ret) 1104cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, mw); 1105cf8e06f1SMark Fasheh else 1106cf8e06f1SMark Fasheh ret = mw->mw_status; 1107cf8e06f1SMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1108cf8e06f1SMark Fasheh INIT_COMPLETION(mw->mw_complete); 1109cf8e06f1SMark Fasheh return ret; 1110cf8e06f1SMark Fasheh } 1111cf8e06f1SMark Fasheh 1112ccd979bdSMark Fasheh static int ocfs2_cluster_lock(struct ocfs2_super *osb, 1113ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1114ccd979bdSMark Fasheh int level, 1115bd3e7610SJoel Becker u32 lkm_flags, 1116ccd979bdSMark Fasheh int arg_flags) 1117ccd979bdSMark Fasheh { 1118ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 1119ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1120ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1121ccd979bdSMark Fasheh unsigned long flags; 1122de551246SJoel Becker unsigned int gen; 11231693a5c0SDavid Teigland int noqueue_attempted = 0; 1124ccd979bdSMark Fasheh 1125ccd979bdSMark Fasheh mlog_entry_void(); 1126ccd979bdSMark Fasheh 1127ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 1128ccd979bdSMark Fasheh 1129b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1130bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 1131b80fc012SMark Fasheh 1132ccd979bdSMark Fasheh again: 1133ccd979bdSMark Fasheh wait = 0; 1134ccd979bdSMark Fasheh 1135ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 1136ccd979bdSMark Fasheh ret = -ERESTARTSYS; 1137ccd979bdSMark Fasheh goto out; 1138ccd979bdSMark Fasheh } 1139ccd979bdSMark Fasheh 1140ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1141ccd979bdSMark Fasheh 1142ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1143ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 1144ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 1145ccd979bdSMark Fasheh 1146ccd979bdSMark Fasheh /* We only compare against the currently granted level 1147ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 1148ccd979bdSMark Fasheh * we'll get caught below. */ 1149ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 1150ccd979bdSMark Fasheh level > lockres->l_level) { 1151ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 1152ccd979bdSMark Fasheh * them. */ 1153ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1154ccd979bdSMark Fasheh wait = 1; 1155ccd979bdSMark Fasheh goto unlock; 1156ccd979bdSMark Fasheh } 1157ccd979bdSMark Fasheh 1158ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1159ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1160ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 1161ccd979bdSMark Fasheh * another node */ 1162ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1163ccd979bdSMark Fasheh wait = 1; 1164ccd979bdSMark Fasheh goto unlock; 1165ccd979bdSMark Fasheh } 1166ccd979bdSMark Fasheh 1167ccd979bdSMark Fasheh if (level > lockres->l_level) { 11681693a5c0SDavid Teigland if (noqueue_attempted > 0) { 11691693a5c0SDavid Teigland ret = -EAGAIN; 11701693a5c0SDavid Teigland goto unlock; 11711693a5c0SDavid Teigland } 11721693a5c0SDavid Teigland if (lkm_flags & DLM_LKF_NOQUEUE) 11731693a5c0SDavid Teigland noqueue_attempted = 1; 11741693a5c0SDavid Teigland 1175ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 1176ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 1177ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 1178ccd979bdSMark Fasheh 1179019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1180019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1181bd3e7610SJoel Becker lkm_flags &= ~DLM_LKF_CONVERT; 1182019d1b22SMark Fasheh } else { 1183ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1184bd3e7610SJoel Becker lkm_flags |= DLM_LKF_CONVERT; 1185019d1b22SMark Fasheh } 1186019d1b22SMark Fasheh 1187ccd979bdSMark Fasheh lockres->l_requested = level; 1188ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1189de551246SJoel Becker gen = lockres_set_pending(lockres); 1190ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1191ccd979bdSMark Fasheh 1192bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_IV); 1193bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_NL); 1194ccd979bdSMark Fasheh 1195ccd979bdSMark Fasheh mlog(0, "lock %s, convert from %d to level = %d\n", 1196ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1197ccd979bdSMark Fasheh 1198ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 11994670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1200ccd979bdSMark Fasheh level, 1201ccd979bdSMark Fasheh &lockres->l_lksb, 1202019d1b22SMark Fasheh lkm_flags, 1203ccd979bdSMark Fasheh lockres->l_name, 1204f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 120524ef1815SJoel Becker lockres); 1206de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 12077431cd7eSJoel Becker if (ret) { 12087431cd7eSJoel Becker if (!(lkm_flags & DLM_LKF_NOQUEUE) || 12097431cd7eSJoel Becker (ret != -EAGAIN)) { 121024ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", 12117431cd7eSJoel Becker ret, lockres); 1212ccd979bdSMark Fasheh } 1213ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1214ccd979bdSMark Fasheh goto out; 1215ccd979bdSMark Fasheh } 1216ccd979bdSMark Fasheh 121724ef1815SJoel Becker mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", 1218ccd979bdSMark Fasheh lockres->l_name); 1219ccd979bdSMark Fasheh 1220ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1221ccd979bdSMark Fasheh * complete our work regardless. */ 1222ccd979bdSMark Fasheh catch_signals = 0; 1223ccd979bdSMark Fasheh 1224ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1225ccd979bdSMark Fasheh goto again; 1226ccd979bdSMark Fasheh } 1227ccd979bdSMark Fasheh 1228ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1229ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1230ccd979bdSMark Fasheh 1231ccd979bdSMark Fasheh ret = 0; 1232ccd979bdSMark Fasheh unlock: 1233ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1234ccd979bdSMark Fasheh out: 1235ccd979bdSMark Fasheh /* 1236ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1237ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1238ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1239ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1240ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1241ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1242ccd979bdSMark Fasheh */ 1243ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1244ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1245ccd979bdSMark Fasheh wait = 0; 1246ccd979bdSMark Fasheh if (lockres_remove_mask_waiter(lockres, &mw)) 1247ccd979bdSMark Fasheh ret = -EAGAIN; 1248ccd979bdSMark Fasheh else 1249ccd979bdSMark Fasheh goto again; 1250ccd979bdSMark Fasheh } 1251ccd979bdSMark Fasheh if (wait) { 1252ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1253ccd979bdSMark Fasheh if (ret == 0) 1254ccd979bdSMark Fasheh goto again; 1255ccd979bdSMark Fasheh mlog_errno(ret); 1256ccd979bdSMark Fasheh } 1257ccd979bdSMark Fasheh 1258ccd979bdSMark Fasheh mlog_exit(ret); 1259ccd979bdSMark Fasheh return ret; 1260ccd979bdSMark Fasheh } 1261ccd979bdSMark Fasheh 1262ccd979bdSMark Fasheh static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 1263ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1264ccd979bdSMark Fasheh int level) 1265ccd979bdSMark Fasheh { 1266ccd979bdSMark Fasheh unsigned long flags; 1267ccd979bdSMark Fasheh 1268ccd979bdSMark Fasheh mlog_entry_void(); 1269ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1270ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 127134d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1272ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1273ccd979bdSMark Fasheh mlog_exit_void(); 1274ccd979bdSMark Fasheh } 1275ccd979bdSMark Fasheh 1276da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1277d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 127824c19ef4SMark Fasheh int ex, 127924c19ef4SMark Fasheh int local) 1280ccd979bdSMark Fasheh { 1281bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1282ccd979bdSMark Fasheh unsigned long flags; 1283bd3e7610SJoel Becker u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1284ccd979bdSMark Fasheh 1285ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1286ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1287ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1288ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1289ccd979bdSMark Fasheh 129024c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1291ccd979bdSMark Fasheh } 1292ccd979bdSMark Fasheh 1293ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1294ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1295ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1296ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1297ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1298ccd979bdSMark Fasheh * with creating a new lock resource. */ 1299ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1300ccd979bdSMark Fasheh { 1301ccd979bdSMark Fasheh int ret; 1302d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1303ccd979bdSMark Fasheh 1304ccd979bdSMark Fasheh BUG_ON(!inode); 1305ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1306ccd979bdSMark Fasheh 1307ccd979bdSMark Fasheh mlog_entry_void(); 1308ccd979bdSMark Fasheh 1309b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1310ccd979bdSMark Fasheh 1311ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1312ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1313ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1314ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1315ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1316ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1317ccd979bdSMark Fasheh * valid when we release the EX. */ 1318ccd979bdSMark Fasheh 131924c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1320ccd979bdSMark Fasheh if (ret) { 1321ccd979bdSMark Fasheh mlog_errno(ret); 1322ccd979bdSMark Fasheh goto bail; 1323ccd979bdSMark Fasheh } 1324ccd979bdSMark Fasheh 132524c19ef4SMark Fasheh /* 1326bd3e7610SJoel Becker * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 132724c19ef4SMark Fasheh * don't use a generation in their lock names. 132824c19ef4SMark Fasheh */ 1329e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1330ccd979bdSMark Fasheh if (ret) { 1331ccd979bdSMark Fasheh mlog_errno(ret); 1332ccd979bdSMark Fasheh goto bail; 1333ccd979bdSMark Fasheh } 1334ccd979bdSMark Fasheh 133550008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 133650008630STiger Yang if (ret) { 133750008630STiger Yang mlog_errno(ret); 133850008630STiger Yang goto bail; 133950008630STiger Yang } 134050008630STiger Yang 1341ccd979bdSMark Fasheh bail: 1342ccd979bdSMark Fasheh mlog_exit(ret); 1343ccd979bdSMark Fasheh return ret; 1344ccd979bdSMark Fasheh } 1345ccd979bdSMark Fasheh 1346ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1347ccd979bdSMark Fasheh { 1348ccd979bdSMark Fasheh int status, level; 1349ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1350c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1351ccd979bdSMark Fasheh 1352ccd979bdSMark Fasheh BUG_ON(!inode); 1353ccd979bdSMark Fasheh 1354ccd979bdSMark Fasheh mlog_entry_void(); 1355ccd979bdSMark Fasheh 1356b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1357b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1358ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1359ccd979bdSMark Fasheh 1360c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 1361c271c5c2SSunil Mushran return 0; 1362c271c5c2SSunil Mushran 1363ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1364ccd979bdSMark Fasheh 1365bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1366ccd979bdSMark Fasheh 1367ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1368ccd979bdSMark Fasheh 0); 1369ccd979bdSMark Fasheh if (status < 0) 1370ccd979bdSMark Fasheh mlog_errno(status); 1371ccd979bdSMark Fasheh 1372ccd979bdSMark Fasheh mlog_exit(status); 1373ccd979bdSMark Fasheh return status; 1374ccd979bdSMark Fasheh } 1375ccd979bdSMark Fasheh 1376ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1377ccd979bdSMark Fasheh { 1378bd3e7610SJoel Becker int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1379ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1380c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1381ccd979bdSMark Fasheh 1382ccd979bdSMark Fasheh mlog_entry_void(); 1383ccd979bdSMark Fasheh 1384b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1385b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1386ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1387ccd979bdSMark Fasheh 1388c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1389ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1390ccd979bdSMark Fasheh 1391ccd979bdSMark Fasheh mlog_exit_void(); 1392ccd979bdSMark Fasheh } 1393ccd979bdSMark Fasheh 139450008630STiger Yang /* 139550008630STiger Yang * ocfs2_open_lock always get PR mode lock. 139650008630STiger Yang */ 139750008630STiger Yang int ocfs2_open_lock(struct inode *inode) 139850008630STiger Yang { 139950008630STiger Yang int status = 0; 140050008630STiger Yang struct ocfs2_lock_res *lockres; 140150008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 140250008630STiger Yang 140350008630STiger Yang BUG_ON(!inode); 140450008630STiger Yang 140550008630STiger Yang mlog_entry_void(); 140650008630STiger Yang 140750008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 140850008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 140950008630STiger Yang 141050008630STiger Yang if (ocfs2_mount_local(osb)) 141150008630STiger Yang goto out; 141250008630STiger Yang 141350008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 141450008630STiger Yang 141550008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1416bd3e7610SJoel Becker DLM_LOCK_PR, 0, 0); 141750008630STiger Yang if (status < 0) 141850008630STiger Yang mlog_errno(status); 141950008630STiger Yang 142050008630STiger Yang out: 142150008630STiger Yang mlog_exit(status); 142250008630STiger Yang return status; 142350008630STiger Yang } 142450008630STiger Yang 142550008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 142650008630STiger Yang { 142750008630STiger Yang int status = 0, level; 142850008630STiger Yang struct ocfs2_lock_res *lockres; 142950008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 143050008630STiger Yang 143150008630STiger Yang BUG_ON(!inode); 143250008630STiger Yang 143350008630STiger Yang mlog_entry_void(); 143450008630STiger Yang 143550008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 143650008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 143750008630STiger Yang write ? "EXMODE" : "PRMODE"); 143850008630STiger Yang 143950008630STiger Yang if (ocfs2_mount_local(osb)) 144050008630STiger Yang goto out; 144150008630STiger Yang 144250008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 144350008630STiger Yang 1444bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 144550008630STiger Yang 144650008630STiger Yang /* 144750008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 1448bd3e7610SJoel Becker * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 144950008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 145050008630STiger Yang * this inode is still in use. 145150008630STiger Yang */ 145250008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1453bd3e7610SJoel Becker level, DLM_LKF_NOQUEUE, 0); 145450008630STiger Yang 145550008630STiger Yang out: 145650008630STiger Yang mlog_exit(status); 145750008630STiger Yang return status; 145850008630STiger Yang } 145950008630STiger Yang 146050008630STiger Yang /* 146150008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 146250008630STiger Yang */ 146350008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 146450008630STiger Yang { 146550008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 146650008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 146750008630STiger Yang 146850008630STiger Yang mlog_entry_void(); 146950008630STiger Yang 147050008630STiger Yang mlog(0, "inode %llu drop open lock\n", 147150008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 147250008630STiger Yang 147350008630STiger Yang if (ocfs2_mount_local(osb)) 147450008630STiger Yang goto out; 147550008630STiger Yang 147650008630STiger Yang if(lockres->l_ro_holders) 147750008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1478bd3e7610SJoel Becker DLM_LOCK_PR); 147950008630STiger Yang if(lockres->l_ex_holders) 148050008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1481bd3e7610SJoel Becker DLM_LOCK_EX); 148250008630STiger Yang 148350008630STiger Yang out: 148450008630STiger Yang mlog_exit_void(); 148550008630STiger Yang } 148650008630STiger Yang 1487cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1488cf8e06f1SMark Fasheh int level) 1489cf8e06f1SMark Fasheh { 1490cf8e06f1SMark Fasheh int ret; 1491cf8e06f1SMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1492cf8e06f1SMark Fasheh unsigned long flags; 1493cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1494cf8e06f1SMark Fasheh 1495cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1496cf8e06f1SMark Fasheh 1497cf8e06f1SMark Fasheh retry_cancel: 1498cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1499cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1500cf8e06f1SMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 1501cf8e06f1SMark Fasheh if (ret) { 1502cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1503cf8e06f1SMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 1504cf8e06f1SMark Fasheh if (ret < 0) { 1505cf8e06f1SMark Fasheh mlog_errno(ret); 1506cf8e06f1SMark Fasheh goto out; 1507cf8e06f1SMark Fasheh } 1508cf8e06f1SMark Fasheh goto retry_cancel; 1509cf8e06f1SMark Fasheh } 1510cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1511cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1512cf8e06f1SMark Fasheh 1513cf8e06f1SMark Fasheh ocfs2_wait_for_mask(&mw); 1514cf8e06f1SMark Fasheh goto retry_cancel; 1515cf8e06f1SMark Fasheh } 1516cf8e06f1SMark Fasheh 1517cf8e06f1SMark Fasheh ret = -ERESTARTSYS; 1518cf8e06f1SMark Fasheh /* 1519cf8e06f1SMark Fasheh * We may still have gotten the lock, in which case there's no 1520cf8e06f1SMark Fasheh * point to restarting the syscall. 1521cf8e06f1SMark Fasheh */ 1522cf8e06f1SMark Fasheh if (lockres->l_level == level) 1523cf8e06f1SMark Fasheh ret = 0; 1524cf8e06f1SMark Fasheh 1525cf8e06f1SMark Fasheh mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1526cf8e06f1SMark Fasheh lockres->l_flags, lockres->l_level, lockres->l_action); 1527cf8e06f1SMark Fasheh 1528cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1529cf8e06f1SMark Fasheh 1530cf8e06f1SMark Fasheh out: 1531cf8e06f1SMark Fasheh return ret; 1532cf8e06f1SMark Fasheh } 1533cf8e06f1SMark Fasheh 1534cf8e06f1SMark Fasheh /* 1535cf8e06f1SMark Fasheh * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1536cf8e06f1SMark Fasheh * flock() calls. The locking approach this requires is sufficiently 1537cf8e06f1SMark Fasheh * different from all other cluster lock types that we implement a 1538cf8e06f1SMark Fasheh * seperate path to the "low-level" dlm calls. In particular: 1539cf8e06f1SMark Fasheh * 1540cf8e06f1SMark Fasheh * - No optimization of lock levels is done - we take at exactly 1541cf8e06f1SMark Fasheh * what's been requested. 1542cf8e06f1SMark Fasheh * 1543cf8e06f1SMark Fasheh * - No lock caching is employed. We immediately downconvert to 1544cf8e06f1SMark Fasheh * no-lock at unlock time. This also means flock locks never go on 1545cf8e06f1SMark Fasheh * the blocking list). 1546cf8e06f1SMark Fasheh * 1547cf8e06f1SMark Fasheh * - Since userspace can trivially deadlock itself with flock, we make 1548cf8e06f1SMark Fasheh * sure to allow cancellation of a misbehaving applications flock() 1549cf8e06f1SMark Fasheh * request. 1550cf8e06f1SMark Fasheh * 1551cf8e06f1SMark Fasheh * - Access to any flock lockres doesn't require concurrency, so we 1552cf8e06f1SMark Fasheh * can simplify the code by requiring the caller to guarantee 1553cf8e06f1SMark Fasheh * serialization of dlmglue flock calls. 1554cf8e06f1SMark Fasheh */ 1555cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock) 1556cf8e06f1SMark Fasheh { 1557cf8e06f1SMark Fasheh int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; 1558cf8e06f1SMark Fasheh unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; 1559cf8e06f1SMark Fasheh unsigned long flags; 1560cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1561cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1562cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1563cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1564cf8e06f1SMark Fasheh 1565cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1566cf8e06f1SMark Fasheh 1567cf8e06f1SMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1568bd3e7610SJoel Becker (lockres->l_level > DLM_LOCK_NL)) { 1569cf8e06f1SMark Fasheh mlog(ML_ERROR, 1570cf8e06f1SMark Fasheh "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1571cf8e06f1SMark Fasheh "level: %u\n", lockres->l_name, lockres->l_flags, 1572cf8e06f1SMark Fasheh lockres->l_level); 1573cf8e06f1SMark Fasheh return -EINVAL; 1574cf8e06f1SMark Fasheh } 1575cf8e06f1SMark Fasheh 1576cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1577cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1578cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1579cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1580cf8e06f1SMark Fasheh 1581cf8e06f1SMark Fasheh /* 1582cf8e06f1SMark Fasheh * Get the lock at NLMODE to start - that way we 1583cf8e06f1SMark Fasheh * can cancel the upconvert request if need be. 1584cf8e06f1SMark Fasheh */ 1585cf8e06f1SMark Fasheh ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); 1586cf8e06f1SMark Fasheh if (ret < 0) { 1587cf8e06f1SMark Fasheh mlog_errno(ret); 1588cf8e06f1SMark Fasheh goto out; 1589cf8e06f1SMark Fasheh } 1590cf8e06f1SMark Fasheh 1591cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1592cf8e06f1SMark Fasheh if (ret) { 1593cf8e06f1SMark Fasheh mlog_errno(ret); 1594cf8e06f1SMark Fasheh goto out; 1595cf8e06f1SMark Fasheh } 1596cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1597cf8e06f1SMark Fasheh } 1598cf8e06f1SMark Fasheh 1599cf8e06f1SMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1600cf8e06f1SMark Fasheh lkm_flags |= LKM_CONVERT; 1601cf8e06f1SMark Fasheh lockres->l_requested = level; 1602cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1603cf8e06f1SMark Fasheh 1604cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1605cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1606cf8e06f1SMark Fasheh 16074670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1608cf8e06f1SMark Fasheh lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, 160924ef1815SJoel Becker lockres); 16107431cd7eSJoel Becker if (ret) { 16117431cd7eSJoel Becker if (!trylock || (ret != -EAGAIN)) { 161224ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1613cf8e06f1SMark Fasheh ret = -EINVAL; 1614cf8e06f1SMark Fasheh } 1615cf8e06f1SMark Fasheh 1616cf8e06f1SMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1617cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, &mw); 1618cf8e06f1SMark Fasheh goto out; 1619cf8e06f1SMark Fasheh } 1620cf8e06f1SMark Fasheh 1621cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1622cf8e06f1SMark Fasheh if (ret == -ERESTARTSYS) { 1623cf8e06f1SMark Fasheh /* 1624cf8e06f1SMark Fasheh * Userspace can cause deadlock itself with 1625cf8e06f1SMark Fasheh * flock(). Current behavior locally is to allow the 1626cf8e06f1SMark Fasheh * deadlock, but abort the system call if a signal is 1627cf8e06f1SMark Fasheh * received. We follow this example, otherwise a 1628cf8e06f1SMark Fasheh * poorly written program could sit in kernel until 1629cf8e06f1SMark Fasheh * reboot. 1630cf8e06f1SMark Fasheh * 1631cf8e06f1SMark Fasheh * Handling this is a bit more complicated for Ocfs2 1632cf8e06f1SMark Fasheh * though. We can't exit this function with an 1633cf8e06f1SMark Fasheh * outstanding lock request, so a cancel convert is 1634cf8e06f1SMark Fasheh * required. We intentionally overwrite 'ret' - if the 1635cf8e06f1SMark Fasheh * cancel fails and the lock was granted, it's easier 1636cf8e06f1SMark Fasheh * to just bubble sucess back up to the user. 1637cf8e06f1SMark Fasheh */ 1638cf8e06f1SMark Fasheh ret = ocfs2_flock_handle_signal(lockres, level); 16391693a5c0SDavid Teigland } else if (!ret && (level > lockres->l_level)) { 16401693a5c0SDavid Teigland /* Trylock failed asynchronously */ 16411693a5c0SDavid Teigland BUG_ON(!trylock); 16421693a5c0SDavid Teigland ret = -EAGAIN; 1643cf8e06f1SMark Fasheh } 1644cf8e06f1SMark Fasheh 1645cf8e06f1SMark Fasheh out: 1646cf8e06f1SMark Fasheh 1647cf8e06f1SMark Fasheh mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1648cf8e06f1SMark Fasheh lockres->l_name, ex, trylock, ret); 1649cf8e06f1SMark Fasheh return ret; 1650cf8e06f1SMark Fasheh } 1651cf8e06f1SMark Fasheh 1652cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file) 1653cf8e06f1SMark Fasheh { 1654cf8e06f1SMark Fasheh int ret; 1655de551246SJoel Becker unsigned int gen; 1656cf8e06f1SMark Fasheh unsigned long flags; 1657cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1658cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1659cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1660cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1661cf8e06f1SMark Fasheh 1662cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1663cf8e06f1SMark Fasheh 1664cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1665cf8e06f1SMark Fasheh return; 1666cf8e06f1SMark Fasheh 1667cf8e06f1SMark Fasheh if (lockres->l_level == LKM_NLMODE) 1668cf8e06f1SMark Fasheh return; 1669cf8e06f1SMark Fasheh 1670cf8e06f1SMark Fasheh mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1671cf8e06f1SMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_level, 1672cf8e06f1SMark Fasheh lockres->l_action); 1673cf8e06f1SMark Fasheh 1674cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1675cf8e06f1SMark Fasheh /* 1676cf8e06f1SMark Fasheh * Fake a blocking ast for the downconvert code. 1677cf8e06f1SMark Fasheh */ 1678cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1679bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_EX; 1680cf8e06f1SMark Fasheh 1681de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); 1682cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1683cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1684cf8e06f1SMark Fasheh 1685de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); 1686cf8e06f1SMark Fasheh if (ret) { 1687cf8e06f1SMark Fasheh mlog_errno(ret); 1688cf8e06f1SMark Fasheh return; 1689cf8e06f1SMark Fasheh } 1690cf8e06f1SMark Fasheh 1691cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1692cf8e06f1SMark Fasheh if (ret) 1693cf8e06f1SMark Fasheh mlog_errno(ret); 1694cf8e06f1SMark Fasheh } 1695cf8e06f1SMark Fasheh 169634d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1697ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 1698ccd979bdSMark Fasheh { 1699ccd979bdSMark Fasheh int kick = 0; 1700ccd979bdSMark Fasheh 1701ccd979bdSMark Fasheh mlog_entry_void(); 1702ccd979bdSMark Fasheh 1703ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 170434d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 1705ccd979bdSMark Fasheh * condition. */ 1706ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1707ccd979bdSMark Fasheh switch(lockres->l_blocking) { 1708bd3e7610SJoel Becker case DLM_LOCK_EX: 1709ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1710ccd979bdSMark Fasheh kick = 1; 1711ccd979bdSMark Fasheh break; 1712bd3e7610SJoel Becker case DLM_LOCK_PR: 1713ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 1714ccd979bdSMark Fasheh kick = 1; 1715ccd979bdSMark Fasheh break; 1716ccd979bdSMark Fasheh default: 1717ccd979bdSMark Fasheh BUG(); 1718ccd979bdSMark Fasheh } 1719ccd979bdSMark Fasheh } 1720ccd979bdSMark Fasheh 1721ccd979bdSMark Fasheh if (kick) 172234d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1723ccd979bdSMark Fasheh 1724ccd979bdSMark Fasheh mlog_exit_void(); 1725ccd979bdSMark Fasheh } 1726ccd979bdSMark Fasheh 1727ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 1728ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 1729ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 1730ccd979bdSMark Fasheh 1731ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 1732ccd979bdSMark Fasheh * now. */ 1733ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 1734ccd979bdSMark Fasheh { 1735ccd979bdSMark Fasheh u64 res; 1736ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 1737ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 1738ccd979bdSMark Fasheh 1739ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 1740ccd979bdSMark Fasheh 1741ccd979bdSMark Fasheh return res; 1742ccd979bdSMark Fasheh } 1743ccd979bdSMark Fasheh 1744ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 1745ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 1746e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 1747ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 1748ccd979bdSMark Fasheh { 1749ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1750e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1751ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 1752ccd979bdSMark Fasheh 1753ccd979bdSMark Fasheh mlog_entry_void(); 1754ccd979bdSMark Fasheh 17558f2c9c1bSJoel Becker lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1756ccd979bdSMark Fasheh 175724c19ef4SMark Fasheh /* 175824c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 175924c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 176024c19ef4SMark Fasheh * status. 176124c19ef4SMark Fasheh */ 176224c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 176324c19ef4SMark Fasheh lvb->lvb_version = 0; 176424c19ef4SMark Fasheh goto out; 176524c19ef4SMark Fasheh } 176624c19ef4SMark Fasheh 17674d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 1768ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1769ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1770ccd979bdSMark Fasheh lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 1771ccd979bdSMark Fasheh lvb->lvb_igid = cpu_to_be32(inode->i_gid); 1772ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 1773ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 1774ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 1775ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 1776ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 1777ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 1778ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 1779ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1780ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 178115b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 1782f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 1783ccd979bdSMark Fasheh 178424c19ef4SMark Fasheh out: 1785ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 1786ccd979bdSMark Fasheh 1787ccd979bdSMark Fasheh mlog_exit_void(); 1788ccd979bdSMark Fasheh } 1789ccd979bdSMark Fasheh 1790ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 1791ccd979bdSMark Fasheh u64 packed_time) 1792ccd979bdSMark Fasheh { 1793ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 1794ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 1795ccd979bdSMark Fasheh } 1796ccd979bdSMark Fasheh 1797ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 1798ccd979bdSMark Fasheh { 1799ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1800e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1801ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 1802ccd979bdSMark Fasheh 1803ccd979bdSMark Fasheh mlog_entry_void(); 1804ccd979bdSMark Fasheh 1805ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 1806ccd979bdSMark Fasheh 18078f2c9c1bSJoel Becker lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1808ccd979bdSMark Fasheh 1809ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 1810ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 1811ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 1812ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1813ccd979bdSMark Fasheh 1814ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 181515b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 1816ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 1817ca4d147eSHerbert Poetzl 1818ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 1819ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 1820ccd979bdSMark Fasheh inode->i_blocks = 0; 1821ccd979bdSMark Fasheh else 18228110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 1823ccd979bdSMark Fasheh 1824ccd979bdSMark Fasheh inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 1825ccd979bdSMark Fasheh inode->i_gid = be32_to_cpu(lvb->lvb_igid); 1826ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 1827ccd979bdSMark Fasheh inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 1828ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 1829ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 1830ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 1831ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 1832ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 1833ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 1834ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 1835ccd979bdSMark Fasheh 1836ccd979bdSMark Fasheh mlog_exit_void(); 1837ccd979bdSMark Fasheh } 1838ccd979bdSMark Fasheh 1839f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 1840f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 1841ccd979bdSMark Fasheh { 18428f2c9c1bSJoel Becker struct ocfs2_meta_lvb *lvb = 18438f2c9c1bSJoel Becker (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1844ccd979bdSMark Fasheh 1845f9e2d82eSMark Fasheh if (lvb->lvb_version == OCFS2_LVB_VERSION 1846f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 1847ccd979bdSMark Fasheh return 1; 1848ccd979bdSMark Fasheh return 0; 1849ccd979bdSMark Fasheh } 1850ccd979bdSMark Fasheh 1851ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 1852ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 1853ccd979bdSMark Fasheh * 1854ccd979bdSMark Fasheh * 0 means no refresh needed. 1855ccd979bdSMark Fasheh * 1856ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 1857ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 1858ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 1859ccd979bdSMark Fasheh { 1860ccd979bdSMark Fasheh unsigned long flags; 1861ccd979bdSMark Fasheh int status = 0; 1862ccd979bdSMark Fasheh 1863ccd979bdSMark Fasheh mlog_entry_void(); 1864ccd979bdSMark Fasheh 1865ccd979bdSMark Fasheh refresh_check: 1866ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1867ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 1868ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1869ccd979bdSMark Fasheh goto bail; 1870ccd979bdSMark Fasheh } 1871ccd979bdSMark Fasheh 1872ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 1873ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1874ccd979bdSMark Fasheh 1875ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 1876ccd979bdSMark Fasheh goto refresh_check; 1877ccd979bdSMark Fasheh } 1878ccd979bdSMark Fasheh 1879ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 1880ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 1881ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1882ccd979bdSMark Fasheh 1883ccd979bdSMark Fasheh status = 1; 1884ccd979bdSMark Fasheh bail: 1885ccd979bdSMark Fasheh mlog_exit(status); 1886ccd979bdSMark Fasheh return status; 1887ccd979bdSMark Fasheh } 1888ccd979bdSMark Fasheh 1889ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 1890ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 1891ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 1892ccd979bdSMark Fasheh int status) 1893ccd979bdSMark Fasheh { 1894ccd979bdSMark Fasheh unsigned long flags; 1895ccd979bdSMark Fasheh mlog_entry_void(); 1896ccd979bdSMark Fasheh 1897ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1898ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 1899ccd979bdSMark Fasheh if (!status) 1900ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 1901ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1902ccd979bdSMark Fasheh 1903ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1904ccd979bdSMark Fasheh 1905ccd979bdSMark Fasheh mlog_exit_void(); 1906ccd979bdSMark Fasheh } 1907ccd979bdSMark Fasheh 1908ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 1909e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 1910ccd979bdSMark Fasheh struct buffer_head **bh) 1911ccd979bdSMark Fasheh { 1912ccd979bdSMark Fasheh int status = 0; 1913ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1914e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1915ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1916c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1917ccd979bdSMark Fasheh 1918ccd979bdSMark Fasheh mlog_entry_void(); 1919ccd979bdSMark Fasheh 1920be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 1921be9e986bSMark Fasheh goto bail; 1922be9e986bSMark Fasheh 1923ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 1924ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 1925b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 1926ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 1927b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 1928ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 1929ccd979bdSMark Fasheh status = -ENOENT; 1930ccd979bdSMark Fasheh goto bail; 1931ccd979bdSMark Fasheh } 1932ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 1933ccd979bdSMark Fasheh 1934ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 1935ccd979bdSMark Fasheh goto bail; 1936ccd979bdSMark Fasheh 1937ccd979bdSMark Fasheh /* This will discard any caching information we might have had 1938ccd979bdSMark Fasheh * for the inode metadata. */ 1939ccd979bdSMark Fasheh ocfs2_metadata_cache_purge(inode); 1940ccd979bdSMark Fasheh 194183418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 194283418978SMark Fasheh 1943be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 1944b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 1945b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 1946ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 1947ccd979bdSMark Fasheh } else { 1948ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 1949ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 1950ccd979bdSMark Fasheh status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, 1951ccd979bdSMark Fasheh bh, OCFS2_BH_CACHED, inode); 1952ccd979bdSMark Fasheh if (status < 0) { 1953ccd979bdSMark Fasheh mlog_errno(status); 1954ccd979bdSMark Fasheh goto bail_refresh; 1955ccd979bdSMark Fasheh } 1956ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 1957ccd979bdSMark Fasheh 1958ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 1959ccd979bdSMark Fasheh * locking an invalid object. 1960ccd979bdSMark Fasheh * 1961ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 1962ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 1963ccd979bdSMark Fasheh * node provides a guarantee that we receive that 1964ccd979bdSMark Fasheh * message and can mark the inode before dropping any 1965ccd979bdSMark Fasheh * locks associated with it. */ 1966ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 1967ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 1968ccd979bdSMark Fasheh status = -EIO; 1969ccd979bdSMark Fasheh goto bail_refresh; 1970ccd979bdSMark Fasheh } 1971ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 1972ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 1973b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 1974ccd979bdSMark Fasheh "inode->i_generation: %u\n", 1975b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 1976b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 1977ccd979bdSMark Fasheh inode->i_generation); 1978ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 1979ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 1980b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 1981b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 1982b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 1983ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 1984ccd979bdSMark Fasheh 1985ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 1986ccd979bdSMark Fasheh } 1987ccd979bdSMark Fasheh 1988ccd979bdSMark Fasheh status = 0; 1989ccd979bdSMark Fasheh bail_refresh: 1990ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 1991ccd979bdSMark Fasheh bail: 1992ccd979bdSMark Fasheh mlog_exit(status); 1993ccd979bdSMark Fasheh return status; 1994ccd979bdSMark Fasheh } 1995ccd979bdSMark Fasheh 1996ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 1997ccd979bdSMark Fasheh struct buffer_head **ret_bh, 1998ccd979bdSMark Fasheh struct buffer_head *passed_bh) 1999ccd979bdSMark Fasheh { 2000ccd979bdSMark Fasheh int status; 2001ccd979bdSMark Fasheh 2002ccd979bdSMark Fasheh if (passed_bh) { 2003ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 2004ccd979bdSMark Fasheh * returned bh. */ 2005ccd979bdSMark Fasheh *ret_bh = passed_bh; 2006ccd979bdSMark Fasheh get_bh(*ret_bh); 2007ccd979bdSMark Fasheh 2008ccd979bdSMark Fasheh return 0; 2009ccd979bdSMark Fasheh } 2010ccd979bdSMark Fasheh 2011ccd979bdSMark Fasheh status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 2012ccd979bdSMark Fasheh OCFS2_I(inode)->ip_blkno, 2013ccd979bdSMark Fasheh ret_bh, 2014ccd979bdSMark Fasheh OCFS2_BH_CACHED, 2015ccd979bdSMark Fasheh inode); 2016ccd979bdSMark Fasheh if (status < 0) 2017ccd979bdSMark Fasheh mlog_errno(status); 2018ccd979bdSMark Fasheh 2019ccd979bdSMark Fasheh return status; 2020ccd979bdSMark Fasheh } 2021ccd979bdSMark Fasheh 2022ccd979bdSMark Fasheh /* 2023ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 2024ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 2025ccd979bdSMark Fasheh */ 2026e63aecb6SMark Fasheh int ocfs2_inode_lock_full(struct inode *inode, 2027ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2028ccd979bdSMark Fasheh int ex, 2029ccd979bdSMark Fasheh int arg_flags) 2030ccd979bdSMark Fasheh { 2031bd3e7610SJoel Becker int status, level, acquired; 2032bd3e7610SJoel Becker u32 dlm_flags; 2033c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 2034ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2035ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 2036ccd979bdSMark Fasheh 2037ccd979bdSMark Fasheh BUG_ON(!inode); 2038ccd979bdSMark Fasheh 2039ccd979bdSMark Fasheh mlog_entry_void(); 2040ccd979bdSMark Fasheh 2041b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 2042b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2043ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2044ccd979bdSMark Fasheh 2045ccd979bdSMark Fasheh status = 0; 2046ccd979bdSMark Fasheh acquired = 0; 2047ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 2048ccd979bdSMark Fasheh * rodevices. */ 2049ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 2050ccd979bdSMark Fasheh if (ex) 2051ccd979bdSMark Fasheh status = -EROFS; 2052ccd979bdSMark Fasheh goto bail; 2053ccd979bdSMark Fasheh } 2054ccd979bdSMark Fasheh 2055c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2056c271c5c2SSunil Mushran goto local; 2057c271c5c2SSunil Mushran 2058ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2059553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2060ccd979bdSMark Fasheh 2061e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 2062bd3e7610SJoel Becker level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2063ccd979bdSMark Fasheh dlm_flags = 0; 2064ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2065bd3e7610SJoel Becker dlm_flags |= DLM_LKF_NOQUEUE; 2066ccd979bdSMark Fasheh 2067ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 2068ccd979bdSMark Fasheh if (status < 0) { 2069ccd979bdSMark Fasheh if (status != -EAGAIN && status != -EIOCBRETRY) 2070ccd979bdSMark Fasheh mlog_errno(status); 2071ccd979bdSMark Fasheh goto bail; 2072ccd979bdSMark Fasheh } 2073ccd979bdSMark Fasheh 2074ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 2075ccd979bdSMark Fasheh acquired = 1; 2076ccd979bdSMark Fasheh 2077ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 2078ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 2079ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 2080ccd979bdSMark Fasheh * abort the operation. */ 2081ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2082553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2083ccd979bdSMark Fasheh 2084c271c5c2SSunil Mushran local: 208524c19ef4SMark Fasheh /* 208624c19ef4SMark Fasheh * We only see this flag if we're being called from 208724c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 208824c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 208924c19ef4SMark Fasheh * and let the caller handle it. 209024c19ef4SMark Fasheh */ 209124c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 209224c19ef4SMark Fasheh status = 0; 2093c271c5c2SSunil Mushran if (lockres) 209424c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 209524c19ef4SMark Fasheh goto bail; 209624c19ef4SMark Fasheh } 209724c19ef4SMark Fasheh 2098ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 2099e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 2100ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 2101ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 2102ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 2103e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 2104ccd979bdSMark Fasheh if (status < 0) { 2105ccd979bdSMark Fasheh if (status != -ENOENT) 2106ccd979bdSMark Fasheh mlog_errno(status); 2107ccd979bdSMark Fasheh goto bail; 2108ccd979bdSMark Fasheh } 2109ccd979bdSMark Fasheh 2110ccd979bdSMark Fasheh if (ret_bh) { 2111ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2112ccd979bdSMark Fasheh if (status < 0) { 2113ccd979bdSMark Fasheh mlog_errno(status); 2114ccd979bdSMark Fasheh goto bail; 2115ccd979bdSMark Fasheh } 2116ccd979bdSMark Fasheh } 2117ccd979bdSMark Fasheh 2118ccd979bdSMark Fasheh bail: 2119ccd979bdSMark Fasheh if (status < 0) { 2120ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 2121ccd979bdSMark Fasheh brelse(*ret_bh); 2122ccd979bdSMark Fasheh *ret_bh = NULL; 2123ccd979bdSMark Fasheh } 2124ccd979bdSMark Fasheh if (acquired) 2125e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2126ccd979bdSMark Fasheh } 2127ccd979bdSMark Fasheh 2128ccd979bdSMark Fasheh if (local_bh) 2129ccd979bdSMark Fasheh brelse(local_bh); 2130ccd979bdSMark Fasheh 2131ccd979bdSMark Fasheh mlog_exit(status); 2132ccd979bdSMark Fasheh return status; 2133ccd979bdSMark Fasheh } 2134ccd979bdSMark Fasheh 2135ccd979bdSMark Fasheh /* 213634d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 213734d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 213834d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 2139ccd979bdSMark Fasheh * 2140ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 2141ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 2142ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2143ccd979bdSMark Fasheh * 214434d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 214534d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 214634d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 214734d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 214834d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 214934d024f8SMark Fasheh * immediately retry the aop call. 2150ccd979bdSMark Fasheh * 2151ccd979bdSMark Fasheh * We do a blocking lock and immediate unlock before returning, though, so that 2152ccd979bdSMark Fasheh * the lock has a great chance of being cached on this node by the time the VFS 2153ccd979bdSMark Fasheh * calls back to retry the aop. This has a potential to livelock as nodes 2154ccd979bdSMark Fasheh * ping locks back and forth, but that's a risk we're willing to take to avoid 2155ccd979bdSMark Fasheh * the lock inversion simply. 2156ccd979bdSMark Fasheh */ 2157e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 2158ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2159ccd979bdSMark Fasheh int ex, 2160ccd979bdSMark Fasheh struct page *page) 2161ccd979bdSMark Fasheh { 2162ccd979bdSMark Fasheh int ret; 2163ccd979bdSMark Fasheh 2164e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2165ccd979bdSMark Fasheh if (ret == -EAGAIN) { 2166ccd979bdSMark Fasheh unlock_page(page); 2167e63aecb6SMark Fasheh if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2168e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2169ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 2170ccd979bdSMark Fasheh } 2171ccd979bdSMark Fasheh 2172ccd979bdSMark Fasheh return ret; 2173ccd979bdSMark Fasheh } 2174ccd979bdSMark Fasheh 2175e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 21767f1a37e3STiger Yang struct vfsmount *vfsmnt, 21777f1a37e3STiger Yang int *level) 21787f1a37e3STiger Yang { 21797f1a37e3STiger Yang int ret; 21807f1a37e3STiger Yang 21817f1a37e3STiger Yang mlog_entry_void(); 2182e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 21837f1a37e3STiger Yang if (ret < 0) { 21847f1a37e3STiger Yang mlog_errno(ret); 21857f1a37e3STiger Yang return ret; 21867f1a37e3STiger Yang } 21877f1a37e3STiger Yang 21887f1a37e3STiger Yang /* 21897f1a37e3STiger Yang * If we should update atime, we will get EX lock, 21907f1a37e3STiger Yang * otherwise we just get PR lock. 21917f1a37e3STiger Yang */ 21927f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 21937f1a37e3STiger Yang struct buffer_head *bh = NULL; 21947f1a37e3STiger Yang 2195e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 2196e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 21977f1a37e3STiger Yang if (ret < 0) { 21987f1a37e3STiger Yang mlog_errno(ret); 21997f1a37e3STiger Yang return ret; 22007f1a37e3STiger Yang } 22017f1a37e3STiger Yang *level = 1; 22027f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 22037f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 22047f1a37e3STiger Yang if (bh) 22057f1a37e3STiger Yang brelse(bh); 22067f1a37e3STiger Yang } else 22077f1a37e3STiger Yang *level = 0; 22087f1a37e3STiger Yang 22097f1a37e3STiger Yang mlog_exit(ret); 22107f1a37e3STiger Yang return ret; 22117f1a37e3STiger Yang } 22127f1a37e3STiger Yang 2213e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 2214ccd979bdSMark Fasheh int ex) 2215ccd979bdSMark Fasheh { 2216bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2217e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2218c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2219ccd979bdSMark Fasheh 2220ccd979bdSMark Fasheh mlog_entry_void(); 2221ccd979bdSMark Fasheh 2222b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 2223b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2224ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2225ccd979bdSMark Fasheh 2226c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2227c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 2228ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2229ccd979bdSMark Fasheh 2230ccd979bdSMark Fasheh mlog_exit_void(); 2231ccd979bdSMark Fasheh } 2232ccd979bdSMark Fasheh 2233ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 2234ccd979bdSMark Fasheh int ex) 2235ccd979bdSMark Fasheh { 2236c271c5c2SSunil Mushran int status = 0; 2237bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2238ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2239ccd979bdSMark Fasheh 2240ccd979bdSMark Fasheh mlog_entry_void(); 2241ccd979bdSMark Fasheh 2242ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2243ccd979bdSMark Fasheh return -EROFS; 2244ccd979bdSMark Fasheh 2245c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2246c271c5c2SSunil Mushran goto bail; 2247c271c5c2SSunil Mushran 2248ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2249ccd979bdSMark Fasheh if (status < 0) { 2250ccd979bdSMark Fasheh mlog_errno(status); 2251ccd979bdSMark Fasheh goto bail; 2252ccd979bdSMark Fasheh } 2253ccd979bdSMark Fasheh 2254ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 2255ccd979bdSMark Fasheh * know when resources covered by the lock need to be 2256ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 2257ccd979bdSMark Fasheh * everything is up to the caller :) */ 2258ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 2259ccd979bdSMark Fasheh if (status < 0) { 2260ccd979bdSMark Fasheh mlog_errno(status); 2261ccd979bdSMark Fasheh goto bail; 2262ccd979bdSMark Fasheh } 2263ccd979bdSMark Fasheh if (status) { 22648e8a4603SMark Fasheh status = ocfs2_refresh_slot_info(osb); 2265ccd979bdSMark Fasheh 2266ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2267ccd979bdSMark Fasheh 2268ccd979bdSMark Fasheh if (status < 0) 2269ccd979bdSMark Fasheh mlog_errno(status); 2270ccd979bdSMark Fasheh } 2271ccd979bdSMark Fasheh bail: 2272ccd979bdSMark Fasheh mlog_exit(status); 2273ccd979bdSMark Fasheh return status; 2274ccd979bdSMark Fasheh } 2275ccd979bdSMark Fasheh 2276ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 2277ccd979bdSMark Fasheh int ex) 2278ccd979bdSMark Fasheh { 2279bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2280ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2281ccd979bdSMark Fasheh 2282c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2283ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 2284ccd979bdSMark Fasheh } 2285ccd979bdSMark Fasheh 2286ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 2287ccd979bdSMark Fasheh { 2288ccd979bdSMark Fasheh int status; 2289ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2290ccd979bdSMark Fasheh 2291ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2292ccd979bdSMark Fasheh return -EROFS; 2293ccd979bdSMark Fasheh 2294c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2295c271c5c2SSunil Mushran return 0; 2296c271c5c2SSunil Mushran 2297bd3e7610SJoel Becker status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2298ccd979bdSMark Fasheh if (status < 0) 2299ccd979bdSMark Fasheh mlog_errno(status); 2300ccd979bdSMark Fasheh 2301ccd979bdSMark Fasheh return status; 2302ccd979bdSMark Fasheh } 2303ccd979bdSMark Fasheh 2304ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 2305ccd979bdSMark Fasheh { 2306ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2307ccd979bdSMark Fasheh 2308c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2309bd3e7610SJoel Becker ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2310ccd979bdSMark Fasheh } 2311ccd979bdSMark Fasheh 2312d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2313d680efe9SMark Fasheh { 2314d680efe9SMark Fasheh int ret; 2315bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2316d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2317d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2318d680efe9SMark Fasheh 2319d680efe9SMark Fasheh BUG_ON(!dl); 2320d680efe9SMark Fasheh 2321d680efe9SMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2322d680efe9SMark Fasheh return -EROFS; 2323d680efe9SMark Fasheh 2324c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2325c271c5c2SSunil Mushran return 0; 2326c271c5c2SSunil Mushran 2327d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2328d680efe9SMark Fasheh if (ret < 0) 2329d680efe9SMark Fasheh mlog_errno(ret); 2330d680efe9SMark Fasheh 2331d680efe9SMark Fasheh return ret; 2332d680efe9SMark Fasheh } 2333d680efe9SMark Fasheh 2334d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2335d680efe9SMark Fasheh { 2336bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2337d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2338d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2339d680efe9SMark Fasheh 2340c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2341d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2342d680efe9SMark Fasheh } 2343d680efe9SMark Fasheh 2344ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 2345ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 2346ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 2347ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 2348ccd979bdSMark Fasheh { 2349ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2350ccd979bdSMark Fasheh 2351ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2352ccd979bdSMark Fasheh 2353ccd979bdSMark Fasheh kfree(dlm_debug); 2354ccd979bdSMark Fasheh } 2355ccd979bdSMark Fasheh 2356ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2357ccd979bdSMark Fasheh { 2358ccd979bdSMark Fasheh if (dlm_debug) 2359ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2360ccd979bdSMark Fasheh } 2361ccd979bdSMark Fasheh 2362ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2363ccd979bdSMark Fasheh { 2364ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 2365ccd979bdSMark Fasheh } 2366ccd979bdSMark Fasheh 2367ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2368ccd979bdSMark Fasheh { 2369ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2370ccd979bdSMark Fasheh 2371ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2372ccd979bdSMark Fasheh if (!dlm_debug) { 2373ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 2374ccd979bdSMark Fasheh goto out; 2375ccd979bdSMark Fasheh } 2376ccd979bdSMark Fasheh 2377ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 2378ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2379ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 2380ccd979bdSMark Fasheh out: 2381ccd979bdSMark Fasheh return dlm_debug; 2382ccd979bdSMark Fasheh } 2383ccd979bdSMark Fasheh 2384ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 2385ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 2386ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 2387ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 2388ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 2389ccd979bdSMark Fasheh }; 2390ccd979bdSMark Fasheh 2391ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2392ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2393ccd979bdSMark Fasheh { 2394ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2395ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2396ccd979bdSMark Fasheh 2397ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2398ccd979bdSMark Fasheh 2399ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2400ccd979bdSMark Fasheh /* discover the head of the list */ 2401ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2402ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2403ccd979bdSMark Fasheh break; 2404ccd979bdSMark Fasheh } 2405ccd979bdSMark Fasheh 2406ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2407ccd979bdSMark Fasheh * l_ops field. */ 2408ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2409ccd979bdSMark Fasheh ret = iter; 2410ccd979bdSMark Fasheh break; 2411ccd979bdSMark Fasheh } 2412ccd979bdSMark Fasheh } 2413ccd979bdSMark Fasheh 2414ccd979bdSMark Fasheh return ret; 2415ccd979bdSMark Fasheh } 2416ccd979bdSMark Fasheh 2417ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2418ccd979bdSMark Fasheh { 2419ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2420ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2421ccd979bdSMark Fasheh 2422ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2423ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2424ccd979bdSMark Fasheh if (iter) { 2425ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2426ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2427ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2428ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2429ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2430ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2431ccd979bdSMark Fasheh * in them. */ 2432ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2433ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2434ccd979bdSMark Fasheh } 2435ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2436ccd979bdSMark Fasheh 2437ccd979bdSMark Fasheh return iter; 2438ccd979bdSMark Fasheh } 2439ccd979bdSMark Fasheh 2440ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2441ccd979bdSMark Fasheh { 2442ccd979bdSMark Fasheh } 2443ccd979bdSMark Fasheh 2444ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2445ccd979bdSMark Fasheh { 2446ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2447ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 2448ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2449ccd979bdSMark Fasheh 2450ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2451ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 2452ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 2453ccd979bdSMark Fasheh if (iter) { 2454ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 2455ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2456ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2457ccd979bdSMark Fasheh } 2458ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2459ccd979bdSMark Fasheh 2460ccd979bdSMark Fasheh return iter; 2461ccd979bdSMark Fasheh } 2462ccd979bdSMark Fasheh 2463ccd979bdSMark Fasheh /* So that debugfs.ocfs2 can determine which format is being used */ 2464ccd979bdSMark Fasheh #define OCFS2_DLM_DEBUG_STR_VERSION 1 2465ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2466ccd979bdSMark Fasheh { 2467ccd979bdSMark Fasheh int i; 2468ccd979bdSMark Fasheh char *lvb; 2469ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 2470ccd979bdSMark Fasheh 2471ccd979bdSMark Fasheh if (!lockres) 2472ccd979bdSMark Fasheh return -EINVAL; 2473ccd979bdSMark Fasheh 2474d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2475d680efe9SMark Fasheh 2476d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2477d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2478d680efe9SMark Fasheh lockres->l_name, 2479d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2480d680efe9SMark Fasheh else 2481d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2482d680efe9SMark Fasheh 2483d680efe9SMark Fasheh seq_printf(m, "%d\t" 2484ccd979bdSMark Fasheh "0x%lx\t" 2485ccd979bdSMark Fasheh "0x%x\t" 2486ccd979bdSMark Fasheh "0x%x\t" 2487ccd979bdSMark Fasheh "%u\t" 2488ccd979bdSMark Fasheh "%u\t" 2489ccd979bdSMark Fasheh "%d\t" 2490ccd979bdSMark Fasheh "%d\t", 2491ccd979bdSMark Fasheh lockres->l_level, 2492ccd979bdSMark Fasheh lockres->l_flags, 2493ccd979bdSMark Fasheh lockres->l_action, 2494ccd979bdSMark Fasheh lockres->l_unlock_action, 2495ccd979bdSMark Fasheh lockres->l_ro_holders, 2496ccd979bdSMark Fasheh lockres->l_ex_holders, 2497ccd979bdSMark Fasheh lockres->l_requested, 2498ccd979bdSMark Fasheh lockres->l_blocking); 2499ccd979bdSMark Fasheh 2500ccd979bdSMark Fasheh /* Dump the raw LVB */ 25018f2c9c1bSJoel Becker lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2502ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 2503ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 2504ccd979bdSMark Fasheh 2505ccd979bdSMark Fasheh /* End the line */ 2506ccd979bdSMark Fasheh seq_printf(m, "\n"); 2507ccd979bdSMark Fasheh return 0; 2508ccd979bdSMark Fasheh } 2509ccd979bdSMark Fasheh 251090d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = { 2511ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 2512ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 2513ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 2514ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 2515ccd979bdSMark Fasheh }; 2516ccd979bdSMark Fasheh 2517ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2518ccd979bdSMark Fasheh { 2519ccd979bdSMark Fasheh struct seq_file *seq = (struct seq_file *) file->private_data; 2520ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 2521ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 2522ccd979bdSMark Fasheh 2523ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 2524ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 2525ccd979bdSMark Fasheh return seq_release_private(inode, file); 2526ccd979bdSMark Fasheh } 2527ccd979bdSMark Fasheh 2528ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2529ccd979bdSMark Fasheh { 2530ccd979bdSMark Fasheh int ret; 2531ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 2532ccd979bdSMark Fasheh struct seq_file *seq; 2533ccd979bdSMark Fasheh struct ocfs2_super *osb; 2534ccd979bdSMark Fasheh 2535ccd979bdSMark Fasheh priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2536ccd979bdSMark Fasheh if (!priv) { 2537ccd979bdSMark Fasheh ret = -ENOMEM; 2538ccd979bdSMark Fasheh mlog_errno(ret); 2539ccd979bdSMark Fasheh goto out; 2540ccd979bdSMark Fasheh } 25418e18e294STheodore Ts'o osb = inode->i_private; 2542ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2543ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 2544ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2545ccd979bdSMark Fasheh 2546ccd979bdSMark Fasheh ret = seq_open(file, &ocfs2_dlm_seq_ops); 2547ccd979bdSMark Fasheh if (ret) { 2548ccd979bdSMark Fasheh kfree(priv); 2549ccd979bdSMark Fasheh mlog_errno(ret); 2550ccd979bdSMark Fasheh goto out; 2551ccd979bdSMark Fasheh } 2552ccd979bdSMark Fasheh 2553ccd979bdSMark Fasheh seq = (struct seq_file *) file->private_data; 2554ccd979bdSMark Fasheh seq->private = priv; 2555ccd979bdSMark Fasheh 2556ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 2557ccd979bdSMark Fasheh priv->p_dlm_debug); 2558ccd979bdSMark Fasheh 2559ccd979bdSMark Fasheh out: 2560ccd979bdSMark Fasheh return ret; 2561ccd979bdSMark Fasheh } 2562ccd979bdSMark Fasheh 25634b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 2564ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 2565ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 2566ccd979bdSMark Fasheh .read = seq_read, 2567ccd979bdSMark Fasheh .llseek = seq_lseek, 2568ccd979bdSMark Fasheh }; 2569ccd979bdSMark Fasheh 2570ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2571ccd979bdSMark Fasheh { 2572ccd979bdSMark Fasheh int ret = 0; 2573ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2574ccd979bdSMark Fasheh 2575ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2576ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 2577ccd979bdSMark Fasheh osb->osb_debug_root, 2578ccd979bdSMark Fasheh osb, 2579ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 2580ccd979bdSMark Fasheh if (!dlm_debug->d_locking_state) { 2581ccd979bdSMark Fasheh ret = -EINVAL; 2582ccd979bdSMark Fasheh mlog(ML_ERROR, 2583ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 2584ccd979bdSMark Fasheh goto out; 2585ccd979bdSMark Fasheh } 2586ccd979bdSMark Fasheh 2587ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 2588ccd979bdSMark Fasheh out: 2589ccd979bdSMark Fasheh return ret; 2590ccd979bdSMark Fasheh } 2591ccd979bdSMark Fasheh 2592ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2593ccd979bdSMark Fasheh { 2594ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2595ccd979bdSMark Fasheh 2596ccd979bdSMark Fasheh if (dlm_debug) { 2597ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 2598ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 2599ccd979bdSMark Fasheh } 2600ccd979bdSMark Fasheh } 2601ccd979bdSMark Fasheh 2602ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 2603ccd979bdSMark Fasheh { 2604c271c5c2SSunil Mushran int status = 0; 26054670c46dSJoel Becker struct ocfs2_cluster_connection *conn = NULL; 2606ccd979bdSMark Fasheh 2607ccd979bdSMark Fasheh mlog_entry_void(); 2608ccd979bdSMark Fasheh 26090abd6d18SMark Fasheh if (ocfs2_mount_local(osb)) { 26100abd6d18SMark Fasheh osb->node_num = 0; 2611c271c5c2SSunil Mushran goto local; 26120abd6d18SMark Fasheh } 2613c271c5c2SSunil Mushran 2614ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 2615ccd979bdSMark Fasheh if (status < 0) { 2616ccd979bdSMark Fasheh mlog_errno(status); 2617ccd979bdSMark Fasheh goto bail; 2618ccd979bdSMark Fasheh } 2619ccd979bdSMark Fasheh 262034d024f8SMark Fasheh /* launch downconvert thread */ 262134d024f8SMark Fasheh osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 262234d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 262334d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 262434d024f8SMark Fasheh osb->dc_task = NULL; 2625ccd979bdSMark Fasheh mlog_errno(status); 2626ccd979bdSMark Fasheh goto bail; 2627ccd979bdSMark Fasheh } 2628ccd979bdSMark Fasheh 2629ccd979bdSMark Fasheh /* for now, uuid == domain */ 26304670c46dSJoel Becker status = ocfs2_cluster_connect(osb->uuid_str, 26314670c46dSJoel Becker strlen(osb->uuid_str), 26324670c46dSJoel Becker ocfs2_do_node_down, osb, 26334670c46dSJoel Becker &conn); 26344670c46dSJoel Becker if (status) { 2635ccd979bdSMark Fasheh mlog_errno(status); 2636ccd979bdSMark Fasheh goto bail; 2637ccd979bdSMark Fasheh } 2638ccd979bdSMark Fasheh 26390abd6d18SMark Fasheh status = ocfs2_cluster_this_node(&osb->node_num); 26400abd6d18SMark Fasheh if (status < 0) { 26410abd6d18SMark Fasheh mlog_errno(status); 26420abd6d18SMark Fasheh mlog(ML_ERROR, 26430abd6d18SMark Fasheh "could not find this host's node number\n"); 2644286eaa95SJoel Becker ocfs2_cluster_disconnect(conn, 0); 26450abd6d18SMark Fasheh goto bail; 26460abd6d18SMark Fasheh } 26470abd6d18SMark Fasheh 2648c271c5c2SSunil Mushran local: 2649ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2650ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2651ccd979bdSMark Fasheh 26524670c46dSJoel Becker osb->cconn = conn; 2653ccd979bdSMark Fasheh 2654ccd979bdSMark Fasheh status = 0; 2655ccd979bdSMark Fasheh bail: 2656ccd979bdSMark Fasheh if (status < 0) { 2657ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 265834d024f8SMark Fasheh if (osb->dc_task) 265934d024f8SMark Fasheh kthread_stop(osb->dc_task); 2660ccd979bdSMark Fasheh } 2661ccd979bdSMark Fasheh 2662ccd979bdSMark Fasheh mlog_exit(status); 2663ccd979bdSMark Fasheh return status; 2664ccd979bdSMark Fasheh } 2665ccd979bdSMark Fasheh 2666286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 2667286eaa95SJoel Becker int hangup_pending) 2668ccd979bdSMark Fasheh { 2669ccd979bdSMark Fasheh mlog_entry_void(); 2670ccd979bdSMark Fasheh 2671ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 2672ccd979bdSMark Fasheh 26734670c46dSJoel Becker /* 26744670c46dSJoel Becker * Now that we have dropped all locks and ocfs2_dismount_volume() 26754670c46dSJoel Becker * has disabled recovery, the DLM won't be talking to us. It's 26764670c46dSJoel Becker * safe to tear things down before disconnecting the cluster. 26774670c46dSJoel Becker */ 26784670c46dSJoel Becker 267934d024f8SMark Fasheh if (osb->dc_task) { 268034d024f8SMark Fasheh kthread_stop(osb->dc_task); 268134d024f8SMark Fasheh osb->dc_task = NULL; 2682ccd979bdSMark Fasheh } 2683ccd979bdSMark Fasheh 2684ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 2685ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 2686ccd979bdSMark Fasheh 2687286eaa95SJoel Becker ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 26884670c46dSJoel Becker osb->cconn = NULL; 2689ccd979bdSMark Fasheh 2690ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 2691ccd979bdSMark Fasheh 2692ccd979bdSMark Fasheh mlog_exit_void(); 2693ccd979bdSMark Fasheh } 2694ccd979bdSMark Fasheh 26957431cd7eSJoel Becker static void ocfs2_unlock_ast(void *opaque, int error) 2696ccd979bdSMark Fasheh { 2697ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 2698ccd979bdSMark Fasheh unsigned long flags; 2699ccd979bdSMark Fasheh 2700ccd979bdSMark Fasheh mlog_entry_void(); 2701ccd979bdSMark Fasheh 2702ccd979bdSMark Fasheh mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 2703ccd979bdSMark Fasheh lockres->l_unlock_action); 2704ccd979bdSMark Fasheh 2705ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2706de551246SJoel Becker if (error) { 27077431cd7eSJoel Becker mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 27087431cd7eSJoel Becker "unlock_action %d\n", error, lockres->l_name, 2709ccd979bdSMark Fasheh lockres->l_unlock_action); 2710ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2711ccd979bdSMark Fasheh return; 2712ccd979bdSMark Fasheh } 2713ccd979bdSMark Fasheh 2714ccd979bdSMark Fasheh switch(lockres->l_unlock_action) { 2715ccd979bdSMark Fasheh case OCFS2_UNLOCK_CANCEL_CONVERT: 2716ccd979bdSMark Fasheh mlog(0, "Cancel convert success for %s\n", lockres->l_name); 2717ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 2718ccd979bdSMark Fasheh break; 2719ccd979bdSMark Fasheh case OCFS2_UNLOCK_DROP_LOCK: 2720bd3e7610SJoel Becker lockres->l_level = DLM_LOCK_IV; 2721ccd979bdSMark Fasheh break; 2722ccd979bdSMark Fasheh default: 2723ccd979bdSMark Fasheh BUG(); 2724ccd979bdSMark Fasheh } 2725ccd979bdSMark Fasheh 2726ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 2727ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 2728ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2729ccd979bdSMark Fasheh 2730ccd979bdSMark Fasheh wake_up(&lockres->l_event); 2731ccd979bdSMark Fasheh 2732ccd979bdSMark Fasheh mlog_exit_void(); 2733ccd979bdSMark Fasheh } 2734ccd979bdSMark Fasheh 2735ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 27360d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 2737ccd979bdSMark Fasheh { 27387431cd7eSJoel Becker int ret; 2739ccd979bdSMark Fasheh unsigned long flags; 2740bd3e7610SJoel Becker u32 lkm_flags = 0; 2741ccd979bdSMark Fasheh 2742ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 2743ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2744ccd979bdSMark Fasheh goto out; 2745ccd979bdSMark Fasheh 2746b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 2747bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 2748b80fc012SMark Fasheh 2749ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2750ccd979bdSMark Fasheh 2751ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 2752ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 2753ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 2754ccd979bdSMark Fasheh 2755ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 2756ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 2757ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 2758ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 2759ccd979bdSMark Fasheh lockres->l_unlock_action); 2760ccd979bdSMark Fasheh 2761ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2762ccd979bdSMark Fasheh 2763ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 2764ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 2765ccd979bdSMark Fasheh * future? */ 2766ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 2767ccd979bdSMark Fasheh 2768ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2769ccd979bdSMark Fasheh } 2770ccd979bdSMark Fasheh 27710d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 27720d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 2773bd3e7610SJoel Becker lockres->l_level == DLM_LOCK_EX && 27740d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 27750d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 27760d5dc6c2SMark Fasheh } 2777ccd979bdSMark Fasheh 2778ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 2779ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 2780ccd979bdSMark Fasheh lockres->l_name); 2781ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 2782ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 2783ccd979bdSMark Fasheh 2784ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 2785ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2786ccd979bdSMark Fasheh goto out; 2787ccd979bdSMark Fasheh } 2788ccd979bdSMark Fasheh 2789ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 2790ccd979bdSMark Fasheh 2791ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 2792ccd979bdSMark Fasheh * fire. */ 2793ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 2794ccd979bdSMark Fasheh 2795ccd979bdSMark Fasheh /* is this necessary? */ 2796ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 2797ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 2798ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2799ccd979bdSMark Fasheh 2800ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 2801ccd979bdSMark Fasheh 28024670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, 280324ef1815SJoel Becker lockres); 28047431cd7eSJoel Becker if (ret) { 28057431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 2806ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 2807cf0acdcdSJoel Becker ocfs2_dlm_dump_lksb(&lockres->l_lksb); 2808ccd979bdSMark Fasheh BUG(); 2809ccd979bdSMark Fasheh } 281024ef1815SJoel Becker mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", 2811ccd979bdSMark Fasheh lockres->l_name); 2812ccd979bdSMark Fasheh 2813ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 2814ccd979bdSMark Fasheh out: 2815ccd979bdSMark Fasheh mlog_exit(0); 2816ccd979bdSMark Fasheh return 0; 2817ccd979bdSMark Fasheh } 2818ccd979bdSMark Fasheh 2819ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 2820ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 282134d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 2822ccd979bdSMark Fasheh * it safe to drop. 2823ccd979bdSMark Fasheh * 2824ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 2825ccd979bdSMark Fasheh void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 2826ccd979bdSMark Fasheh { 2827ccd979bdSMark Fasheh int status; 2828ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 2829ccd979bdSMark Fasheh unsigned long flags; 2830ccd979bdSMark Fasheh 2831ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 2832ccd979bdSMark Fasheh 2833ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2834ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 2835ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 2836ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 2837ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2838ccd979bdSMark Fasheh 2839ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 2840ccd979bdSMark Fasheh 2841ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 2842ccd979bdSMark Fasheh if (status) 2843ccd979bdSMark Fasheh mlog_errno(status); 2844ccd979bdSMark Fasheh 2845ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2846ccd979bdSMark Fasheh } 2847ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2848ccd979bdSMark Fasheh } 2849ccd979bdSMark Fasheh 2850d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 2851d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 2852d680efe9SMark Fasheh { 2853d680efe9SMark Fasheh int ret; 2854d680efe9SMark Fasheh 2855d680efe9SMark Fasheh ocfs2_mark_lockres_freeing(lockres); 28560d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 2857d680efe9SMark Fasheh if (ret) 2858d680efe9SMark Fasheh mlog_errno(ret); 2859d680efe9SMark Fasheh } 2860d680efe9SMark Fasheh 2861ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 2862ccd979bdSMark Fasheh { 2863d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 2864d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 2865ccd979bdSMark Fasheh } 2866ccd979bdSMark Fasheh 2867ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 2868ccd979bdSMark Fasheh { 2869ccd979bdSMark Fasheh int status, err; 2870ccd979bdSMark Fasheh 2871ccd979bdSMark Fasheh mlog_entry_void(); 2872ccd979bdSMark Fasheh 2873ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 2874ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 2875ccd979bdSMark Fasheh 2876ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 287750008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 2878ccd979bdSMark Fasheh if (err < 0) 2879ccd979bdSMark Fasheh mlog_errno(err); 2880ccd979bdSMark Fasheh 2881ccd979bdSMark Fasheh status = err; 2882ccd979bdSMark Fasheh 2883ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2884e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 2885ccd979bdSMark Fasheh if (err < 0) 2886ccd979bdSMark Fasheh mlog_errno(err); 2887ccd979bdSMark Fasheh if (err < 0 && !status) 2888ccd979bdSMark Fasheh status = err; 2889ccd979bdSMark Fasheh 2890ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 28910d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 2892ccd979bdSMark Fasheh if (err < 0) 2893ccd979bdSMark Fasheh mlog_errno(err); 2894ccd979bdSMark Fasheh if (err < 0 && !status) 2895ccd979bdSMark Fasheh status = err; 2896ccd979bdSMark Fasheh 2897ccd979bdSMark Fasheh mlog_exit(status); 2898ccd979bdSMark Fasheh return status; 2899ccd979bdSMark Fasheh } 2900ccd979bdSMark Fasheh 2901de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 2902ccd979bdSMark Fasheh int new_level) 2903ccd979bdSMark Fasheh { 2904ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 2905ccd979bdSMark Fasheh 2906bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 2907ccd979bdSMark Fasheh 2908ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 2909bd3e7610SJoel Becker mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", 2910ccd979bdSMark Fasheh lockres->l_level, new_level); 2911ccd979bdSMark Fasheh BUG(); 2912ccd979bdSMark Fasheh } 2913ccd979bdSMark Fasheh 2914ccd979bdSMark Fasheh mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 2915ccd979bdSMark Fasheh lockres->l_name, new_level, lockres->l_blocking); 2916ccd979bdSMark Fasheh 2917ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 2918ccd979bdSMark Fasheh lockres->l_requested = new_level; 2919ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 2920de551246SJoel Becker return lockres_set_pending(lockres); 2921ccd979bdSMark Fasheh } 2922ccd979bdSMark Fasheh 2923ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 2924ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 2925ccd979bdSMark Fasheh int new_level, 2926de551246SJoel Becker int lvb, 2927de551246SJoel Becker unsigned int generation) 2928ccd979bdSMark Fasheh { 2929bd3e7610SJoel Becker int ret; 2930bd3e7610SJoel Becker u32 dlm_flags = DLM_LKF_CONVERT; 2931ccd979bdSMark Fasheh 2932ccd979bdSMark Fasheh mlog_entry_void(); 2933ccd979bdSMark Fasheh 2934ccd979bdSMark Fasheh if (lvb) 2935bd3e7610SJoel Becker dlm_flags |= DLM_LKF_VALBLK; 2936ccd979bdSMark Fasheh 29374670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 2938ccd979bdSMark Fasheh new_level, 2939ccd979bdSMark Fasheh &lockres->l_lksb, 2940ccd979bdSMark Fasheh dlm_flags, 2941ccd979bdSMark Fasheh lockres->l_name, 2942f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 294324ef1815SJoel Becker lockres); 2944de551246SJoel Becker lockres_clear_pending(lockres, generation, osb); 29457431cd7eSJoel Becker if (ret) { 29467431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 2947ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 2948ccd979bdSMark Fasheh goto bail; 2949ccd979bdSMark Fasheh } 2950ccd979bdSMark Fasheh 2951ccd979bdSMark Fasheh ret = 0; 2952ccd979bdSMark Fasheh bail: 2953ccd979bdSMark Fasheh mlog_exit(ret); 2954ccd979bdSMark Fasheh return ret; 2955ccd979bdSMark Fasheh } 2956ccd979bdSMark Fasheh 295724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 2958ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 2959ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2960ccd979bdSMark Fasheh { 2961ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 2962ccd979bdSMark Fasheh 2963ccd979bdSMark Fasheh mlog_entry_void(); 2964ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 2965ccd979bdSMark Fasheh 2966ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 2967ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 2968ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 2969ccd979bdSMark Fasheh * requeue this lock. */ 2970ccd979bdSMark Fasheh 2971ccd979bdSMark Fasheh mlog(0, "Lockres %s, skip convert\n", lockres->l_name); 2972ccd979bdSMark Fasheh return 0; 2973ccd979bdSMark Fasheh } 2974ccd979bdSMark Fasheh 2975ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 2976ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 2977ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 2978ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 2979ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 2980ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 2981ccd979bdSMark Fasheh 2982ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 2983ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 2984ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 2985ccd979bdSMark Fasheh 2986ccd979bdSMark Fasheh return 1; 2987ccd979bdSMark Fasheh } 2988ccd979bdSMark Fasheh 2989ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 2990ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2991ccd979bdSMark Fasheh { 2992ccd979bdSMark Fasheh int ret; 2993ccd979bdSMark Fasheh 2994ccd979bdSMark Fasheh mlog_entry_void(); 2995ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 2996ccd979bdSMark Fasheh 29974670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 29987431cd7eSJoel Becker DLM_LKF_CANCEL, lockres); 29997431cd7eSJoel Becker if (ret) { 30007431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3001ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 3002ccd979bdSMark Fasheh } 3003ccd979bdSMark Fasheh 300424ef1815SJoel Becker mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); 3005ccd979bdSMark Fasheh 3006ccd979bdSMark Fasheh mlog_exit(ret); 3007ccd979bdSMark Fasheh return ret; 3008ccd979bdSMark Fasheh } 3009ccd979bdSMark Fasheh 3010b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3011ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3012cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 3013ccd979bdSMark Fasheh { 3014ccd979bdSMark Fasheh unsigned long flags; 3015ccd979bdSMark Fasheh int blocking; 3016ccd979bdSMark Fasheh int new_level; 3017ccd979bdSMark Fasheh int ret = 0; 30185ef0d4eaSMark Fasheh int set_lvb = 0; 3019de551246SJoel Becker unsigned int gen; 3020ccd979bdSMark Fasheh 3021ccd979bdSMark Fasheh mlog_entry_void(); 3022ccd979bdSMark Fasheh 3023ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3024ccd979bdSMark Fasheh 3025ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 3026ccd979bdSMark Fasheh 3027ccd979bdSMark Fasheh recheck: 3028ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3029de551246SJoel Becker /* XXX 3030de551246SJoel Becker * This is a *big* race. The OCFS2_LOCK_PENDING flag 3031de551246SJoel Becker * exists entirely for one reason - another thread has set 3032de551246SJoel Becker * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3033de551246SJoel Becker * 3034de551246SJoel Becker * If we do ocfs2_cancel_convert() before the other thread 3035de551246SJoel Becker * calls dlm_lock(), our cancel will do nothing. We will 3036de551246SJoel Becker * get no ast, and we will have no way of knowing the 3037de551246SJoel Becker * cancel failed. Meanwhile, the other thread will call 3038de551246SJoel Becker * into dlm_lock() and wait...forever. 3039de551246SJoel Becker * 3040de551246SJoel Becker * Why forever? Because another node has asked for the 3041de551246SJoel Becker * lock first; that's why we're here in unblock_lock(). 3042de551246SJoel Becker * 3043de551246SJoel Becker * The solution is OCFS2_LOCK_PENDING. When PENDING is 3044de551246SJoel Becker * set, we just requeue the unblock. Only when the other 3045de551246SJoel Becker * thread has called dlm_lock() and cleared PENDING will 3046de551246SJoel Becker * we then cancel their request. 3047de551246SJoel Becker * 3048de551246SJoel Becker * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3049de551246SJoel Becker * at the same time they set OCFS2_DLM_BUSY. They must 3050de551246SJoel Becker * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3051de551246SJoel Becker */ 3052de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_PENDING) 3053de551246SJoel Becker goto leave_requeue; 3054de551246SJoel Becker 3055d680efe9SMark Fasheh ctl->requeue = 1; 3056ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 3057ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3058ccd979bdSMark Fasheh if (ret) { 3059ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 3060ccd979bdSMark Fasheh if (ret < 0) 3061ccd979bdSMark Fasheh mlog_errno(ret); 3062ccd979bdSMark Fasheh } 3063ccd979bdSMark Fasheh goto leave; 3064ccd979bdSMark Fasheh } 3065ccd979bdSMark Fasheh 3066ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 3067ccd979bdSMark Fasheh * then requeue. */ 3068bd3e7610SJoel Becker if ((lockres->l_blocking == DLM_LOCK_EX) 3069f7fbfdd1SMark Fasheh && (lockres->l_ex_holders || lockres->l_ro_holders)) 3070f7fbfdd1SMark Fasheh goto leave_requeue; 3071ccd979bdSMark Fasheh 3072ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 3073ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 3074bd3e7610SJoel Becker if (lockres->l_blocking == DLM_LOCK_PR && 3075f7fbfdd1SMark Fasheh lockres->l_ex_holders) 3076f7fbfdd1SMark Fasheh goto leave_requeue; 3077f7fbfdd1SMark Fasheh 3078f7fbfdd1SMark Fasheh /* 3079f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 3080f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 3081f7fbfdd1SMark Fasheh */ 3082f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3083f7fbfdd1SMark Fasheh && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 3084f7fbfdd1SMark Fasheh goto leave_requeue; 3085ccd979bdSMark Fasheh 308616d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 308716d5b956SMark Fasheh 308816d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 308916d5b956SMark Fasheh && !lockres->l_ops->check_downconvert(lockres, new_level)) 309016d5b956SMark Fasheh goto leave_requeue; 309116d5b956SMark Fasheh 3092ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 3093ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 3094ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 3095cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 3096ccd979bdSMark Fasheh goto downconvert; 3097ccd979bdSMark Fasheh 3098ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 3099ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 3100ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 3101ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 3102ccd979bdSMark Fasheh blocking = lockres->l_blocking; 3103ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3104ccd979bdSMark Fasheh 3105cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3106d680efe9SMark Fasheh 3107d680efe9SMark Fasheh if (ctl->unblock_action == UNBLOCK_STOP_POST) 3108d680efe9SMark Fasheh goto leave; 3109ccd979bdSMark Fasheh 3110ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3111ccd979bdSMark Fasheh if (blocking != lockres->l_blocking) { 3112ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 3113ccd979bdSMark Fasheh * it just yet. */ 3114ccd979bdSMark Fasheh goto recheck; 3115ccd979bdSMark Fasheh } 3116ccd979bdSMark Fasheh 3117ccd979bdSMark Fasheh downconvert: 3118d680efe9SMark Fasheh ctl->requeue = 0; 3119ccd979bdSMark Fasheh 31205ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3121bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_EX) 31225ef0d4eaSMark Fasheh set_lvb = 1; 31235ef0d4eaSMark Fasheh 31245ef0d4eaSMark Fasheh /* 31255ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 31265ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 31275ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 31285ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 31295ef0d4eaSMark Fasheh */ 31305ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 31315ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 31325ef0d4eaSMark Fasheh } 31335ef0d4eaSMark Fasheh 3134de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, new_level); 3135ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3136de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3137de551246SJoel Becker gen); 3138de551246SJoel Becker 3139ccd979bdSMark Fasheh leave: 3140ccd979bdSMark Fasheh mlog_exit(ret); 3141ccd979bdSMark Fasheh return ret; 3142f7fbfdd1SMark Fasheh 3143f7fbfdd1SMark Fasheh leave_requeue: 3144f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3145f7fbfdd1SMark Fasheh ctl->requeue = 1; 3146f7fbfdd1SMark Fasheh 3147f7fbfdd1SMark Fasheh mlog_exit(0); 3148f7fbfdd1SMark Fasheh return 0; 3149ccd979bdSMark Fasheh } 3150ccd979bdSMark Fasheh 3151d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3152ccd979bdSMark Fasheh int blocking) 3153ccd979bdSMark Fasheh { 3154ccd979bdSMark Fasheh struct inode *inode; 3155ccd979bdSMark Fasheh struct address_space *mapping; 3156ccd979bdSMark Fasheh 3157ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 3158ccd979bdSMark Fasheh mapping = inode->i_mapping; 3159ccd979bdSMark Fasheh 31601044e401SMark Fasheh if (!S_ISREG(inode->i_mode)) 3161f1f54068SMark Fasheh goto out; 3162f1f54068SMark Fasheh 31637f4a2a97SMark Fasheh /* 31647f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 31657f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 31667f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 31677f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 31687f4a2a97SMark Fasheh * them up again. 31697f4a2a97SMark Fasheh */ 31707f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 31717f4a2a97SMark Fasheh 3172ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 3173b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3174b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 3175ccd979bdSMark Fasheh } 3176ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 3177bd3e7610SJoel Becker if (blocking == DLM_LOCK_EX) { 3178ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 3179ccd979bdSMark Fasheh } else { 3180ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 3181ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 3182ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 3183ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 3184ccd979bdSMark Fasheh * them around in that case. */ 3185ccd979bdSMark Fasheh filemap_fdatawait(mapping); 3186ccd979bdSMark Fasheh } 3187ccd979bdSMark Fasheh 3188f1f54068SMark Fasheh out: 3189d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3190ccd979bdSMark Fasheh } 3191ccd979bdSMark Fasheh 3192810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3193810d5aebSMark Fasheh int new_level) 3194810d5aebSMark Fasheh { 3195810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3196810d5aebSMark Fasheh int checkpointed = ocfs2_inode_fully_checkpointed(inode); 3197810d5aebSMark Fasheh 3198bd3e7610SJoel Becker BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3199bd3e7610SJoel Becker BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3200810d5aebSMark Fasheh 3201810d5aebSMark Fasheh if (checkpointed) 3202810d5aebSMark Fasheh return 1; 3203810d5aebSMark Fasheh 3204810d5aebSMark Fasheh ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); 3205810d5aebSMark Fasheh return 0; 3206810d5aebSMark Fasheh } 3207810d5aebSMark Fasheh 3208810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3209810d5aebSMark Fasheh { 3210810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3211810d5aebSMark Fasheh 3212810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 3213810d5aebSMark Fasheh } 3214810d5aebSMark Fasheh 3215d680efe9SMark Fasheh /* 3216d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 321734d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 3218d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 3219d680efe9SMark Fasheh */ 3220d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3221d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3222d680efe9SMark Fasheh { 3223d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3224d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 3225d680efe9SMark Fasheh } 3226d680efe9SMark Fasheh 3227d680efe9SMark Fasheh /* 3228d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 3229d680efe9SMark Fasheh * 3230d680efe9SMark Fasheh * At this point, any process waiting to destroy the 3231d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 3232d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 3233d680efe9SMark Fasheh * 3234d680efe9SMark Fasheh * We have two potential problems 3235d680efe9SMark Fasheh * 3236d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 3237d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3238d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 3239d680efe9SMark Fasheh * reference and push the drop until after we've completed our 3240d680efe9SMark Fasheh * unblock processing. 3241d680efe9SMark Fasheh * 3242d680efe9SMark Fasheh * 2) There might be another process with a final reference, 3243d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 3244d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 3245d680efe9SMark Fasheh */ 3246d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3247d680efe9SMark Fasheh int blocking) 3248d680efe9SMark Fasheh { 3249d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3250d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3251d680efe9SMark Fasheh struct dentry *dentry; 3252d680efe9SMark Fasheh unsigned long flags; 3253d680efe9SMark Fasheh int extra_ref = 0; 3254d680efe9SMark Fasheh 3255d680efe9SMark Fasheh /* 3256d680efe9SMark Fasheh * This node is blocking another node from getting a read 3257d680efe9SMark Fasheh * lock. This happens when we've renamed within a 3258d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 3259d680efe9SMark Fasheh * we never actually dropped our lock because it's still 3260d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 3261d680efe9SMark Fasheh * so there's no further work to do. 3262d680efe9SMark Fasheh */ 3263bd3e7610SJoel Becker if (blocking == DLM_LOCK_PR) 3264d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3265d680efe9SMark Fasheh 3266d680efe9SMark Fasheh /* 3267d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 3268d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 3269d680efe9SMark Fasheh * needs to be freed or not. 3270d680efe9SMark Fasheh */ 3271d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 3272d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3273d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 3274d680efe9SMark Fasheh 3275d680efe9SMark Fasheh /* 3276d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 3277d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 3278d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 3279d680efe9SMark Fasheh * flag. 3280d680efe9SMark Fasheh */ 3281d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3282d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3283d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3284d680efe9SMark Fasheh && dl->dl_count) { 3285d680efe9SMark Fasheh dl->dl_count++; 3286d680efe9SMark Fasheh extra_ref = 1; 3287d680efe9SMark Fasheh } 3288d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3289d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3290d680efe9SMark Fasheh 3291d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 3292d680efe9SMark Fasheh 3293d680efe9SMark Fasheh /* 3294d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 3295d680efe9SMark Fasheh * which means we can't have any more outstanding 3296d680efe9SMark Fasheh * aliases. There's no need to do any more work. 3297d680efe9SMark Fasheh */ 3298d680efe9SMark Fasheh if (!extra_ref) 3299d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3300d680efe9SMark Fasheh 3301d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3302d680efe9SMark Fasheh while (1) { 3303d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 3304d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 3305d680efe9SMark Fasheh if (!dentry) 3306d680efe9SMark Fasheh break; 3307d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3308d680efe9SMark Fasheh 3309d680efe9SMark Fasheh mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 3310d680efe9SMark Fasheh dentry->d_name.name); 3311d680efe9SMark Fasheh 3312d680efe9SMark Fasheh /* 3313d680efe9SMark Fasheh * The following dcache calls may do an 3314d680efe9SMark Fasheh * iput(). Normally we don't want that from the 3315d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 3316d680efe9SMark Fasheh * because the requesting node already has an 3317d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 3318d680efe9SMark Fasheh * for a downconvert. 3319d680efe9SMark Fasheh */ 3320d680efe9SMark Fasheh d_delete(dentry); 3321d680efe9SMark Fasheh dput(dentry); 3322d680efe9SMark Fasheh 3323d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3324d680efe9SMark Fasheh } 3325d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3326d680efe9SMark Fasheh 3327d680efe9SMark Fasheh /* 3328d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 3329d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 3330d680efe9SMark Fasheh */ 3331d680efe9SMark Fasheh if (dl->dl_count == 1) 3332d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 3333d680efe9SMark Fasheh 3334d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 3335d680efe9SMark Fasheh } 3336d680efe9SMark Fasheh 33374670c46dSJoel Becker /* 33384670c46dSJoel Becker * This is the filesystem locking protocol. It provides the lock handling 33394670c46dSJoel Becker * hooks for the underlying DLM. It has a maximum version number. 33404670c46dSJoel Becker * The version number allows interoperability with systems running at 33414670c46dSJoel Becker * the same major number and an equal or smaller minor number. 33424670c46dSJoel Becker * 33434670c46dSJoel Becker * Whenever the filesystem does new things with locks (adds or removes a 33444670c46dSJoel Becker * lock, orders them differently, does different things underneath a lock), 33454670c46dSJoel Becker * the version must be changed. The protocol is negotiated when joining 33464670c46dSJoel Becker * the dlm domain. A node may join the domain if its major version is 33474670c46dSJoel Becker * identical to all other nodes and its minor version is greater than 33484670c46dSJoel Becker * or equal to all other nodes. When its minor version is greater than 33494670c46dSJoel Becker * the other nodes, it will run at the minor version specified by the 33504670c46dSJoel Becker * other nodes. 33514670c46dSJoel Becker * 33524670c46dSJoel Becker * If a locking change is made that will not be compatible with older 33534670c46dSJoel Becker * versions, the major number must be increased and the minor version set 33544670c46dSJoel Becker * to zero. If a change merely adds a behavior that can be disabled when 33554670c46dSJoel Becker * speaking to older versions, the minor version must be increased. If a 33564670c46dSJoel Becker * change adds a fully backwards compatible change (eg, LVB changes that 33574670c46dSJoel Becker * are just ignored by older versions), the version does not need to be 33584670c46dSJoel Becker * updated. 33594670c46dSJoel Becker */ 336024ef1815SJoel Becker static struct ocfs2_locking_protocol lproto = { 33614670c46dSJoel Becker .lp_max_version = { 33624670c46dSJoel Becker .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 33634670c46dSJoel Becker .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 33644670c46dSJoel Becker }, 336524ef1815SJoel Becker .lp_lock_ast = ocfs2_locking_ast, 336624ef1815SJoel Becker .lp_blocking_ast = ocfs2_blocking_ast, 336724ef1815SJoel Becker .lp_unlock_ast = ocfs2_unlock_ast, 336824ef1815SJoel Becker }; 336924ef1815SJoel Becker 337063e0c48aSJoel Becker void ocfs2_set_locking_protocol(void) 337124ef1815SJoel Becker { 337263e0c48aSJoel Becker ocfs2_stack_glue_set_locking_protocol(&lproto); 337324ef1815SJoel Becker } 337424ef1815SJoel Becker 337524ef1815SJoel Becker 337600600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3377ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3378ccd979bdSMark Fasheh { 3379ccd979bdSMark Fasheh int status; 3380d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 3381ccd979bdSMark Fasheh unsigned long flags; 3382ccd979bdSMark Fasheh 3383ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 3384ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 3385ccd979bdSMark Fasheh * flag. */ 3386ccd979bdSMark Fasheh 3387ccd979bdSMark Fasheh mlog_entry_void(); 3388ccd979bdSMark Fasheh 3389ccd979bdSMark Fasheh BUG_ON(!lockres); 3390ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 3391ccd979bdSMark Fasheh 3392ccd979bdSMark Fasheh mlog(0, "lockres %s blocked.\n", lockres->l_name); 3393ccd979bdSMark Fasheh 3394ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 339534d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 3396ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 3397ccd979bdSMark Fasheh * but short circuiting here will still save us some 3398ccd979bdSMark Fasheh * performance. */ 3399ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3400ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 3401ccd979bdSMark Fasheh goto unqueue; 3402ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3403ccd979bdSMark Fasheh 3404b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 3405ccd979bdSMark Fasheh if (status < 0) 3406ccd979bdSMark Fasheh mlog_errno(status); 3407ccd979bdSMark Fasheh 3408ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3409ccd979bdSMark Fasheh unqueue: 3410d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 3411ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 3412ccd979bdSMark Fasheh } else 3413ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 3414ccd979bdSMark Fasheh 3415ccd979bdSMark Fasheh mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 3416d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 3417ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3418ccd979bdSMark Fasheh 3419d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 3420d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 3421d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 3422d680efe9SMark Fasheh 3423ccd979bdSMark Fasheh mlog_exit_void(); 3424ccd979bdSMark Fasheh } 3425ccd979bdSMark Fasheh 3426ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3427ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3428ccd979bdSMark Fasheh { 3429ccd979bdSMark Fasheh mlog_entry_void(); 3430ccd979bdSMark Fasheh 3431ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3432ccd979bdSMark Fasheh 3433ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3434ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 3435ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 3436ccd979bdSMark Fasheh * to the resource will get it soon. */ 3437ccd979bdSMark Fasheh mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 3438ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3439ccd979bdSMark Fasheh return; 3440ccd979bdSMark Fasheh } 3441ccd979bdSMark Fasheh 3442ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 3443ccd979bdSMark Fasheh 344434d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 3445ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 3446ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 3447ccd979bdSMark Fasheh &osb->blocked_lock_list); 3448ccd979bdSMark Fasheh osb->blocked_lock_count++; 3449ccd979bdSMark Fasheh } 345034d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 3451ccd979bdSMark Fasheh 3452ccd979bdSMark Fasheh mlog_exit_void(); 3453ccd979bdSMark Fasheh } 345434d024f8SMark Fasheh 345534d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 345634d024f8SMark Fasheh { 345734d024f8SMark Fasheh unsigned long processed; 345834d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 345934d024f8SMark Fasheh 346034d024f8SMark Fasheh mlog_entry_void(); 346134d024f8SMark Fasheh 346234d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 346334d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 346434d024f8SMark Fasheh * wake happens part-way through our work */ 346534d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 346634d024f8SMark Fasheh 346734d024f8SMark Fasheh processed = osb->blocked_lock_count; 346834d024f8SMark Fasheh while (processed) { 346934d024f8SMark Fasheh BUG_ON(list_empty(&osb->blocked_lock_list)); 347034d024f8SMark Fasheh 347134d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 347234d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 347334d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 347434d024f8SMark Fasheh osb->blocked_lock_count--; 347534d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 347634d024f8SMark Fasheh 347734d024f8SMark Fasheh BUG_ON(!processed); 347834d024f8SMark Fasheh processed--; 347934d024f8SMark Fasheh 348034d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 348134d024f8SMark Fasheh 348234d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 348334d024f8SMark Fasheh } 348434d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 348534d024f8SMark Fasheh 348634d024f8SMark Fasheh mlog_exit_void(); 348734d024f8SMark Fasheh } 348834d024f8SMark Fasheh 348934d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 349034d024f8SMark Fasheh { 349134d024f8SMark Fasheh int empty = 0; 349234d024f8SMark Fasheh 349334d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 349434d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 349534d024f8SMark Fasheh empty = 1; 349634d024f8SMark Fasheh 349734d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 349834d024f8SMark Fasheh return empty; 349934d024f8SMark Fasheh } 350034d024f8SMark Fasheh 350134d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 350234d024f8SMark Fasheh { 350334d024f8SMark Fasheh int should_wake = 0; 350434d024f8SMark Fasheh 350534d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 350634d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 350734d024f8SMark Fasheh should_wake = 1; 350834d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 350934d024f8SMark Fasheh 351034d024f8SMark Fasheh return should_wake; 351134d024f8SMark Fasheh } 351234d024f8SMark Fasheh 3513200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg) 351434d024f8SMark Fasheh { 351534d024f8SMark Fasheh int status = 0; 351634d024f8SMark Fasheh struct ocfs2_super *osb = arg; 351734d024f8SMark Fasheh 351834d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 351934d024f8SMark Fasheh * work available */ 352034d024f8SMark Fasheh while (!(kthread_should_stop() && 352134d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 352234d024f8SMark Fasheh 352334d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 352434d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 352534d024f8SMark Fasheh kthread_should_stop()); 352634d024f8SMark Fasheh 352734d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 352834d024f8SMark Fasheh 352934d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 353034d024f8SMark Fasheh } 353134d024f8SMark Fasheh 353234d024f8SMark Fasheh osb->dc_task = NULL; 353334d024f8SMark Fasheh return status; 353434d024f8SMark Fasheh } 353534d024f8SMark Fasheh 353634d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 353734d024f8SMark Fasheh { 353834d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 353934d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 354034d024f8SMark Fasheh * the caller may have made to the voting state */ 354134d024f8SMark Fasheh osb->dc_wake_sequence++; 354234d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 354334d024f8SMark Fasheh wake_up(&osb->dc_event); 354434d024f8SMark Fasheh } 3545