1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/kthread.h> 31ccd979bdSMark Fasheh #include <linux/pagemap.h> 32ccd979bdSMark Fasheh #include <linux/debugfs.h> 33ccd979bdSMark Fasheh #include <linux/seq_file.h> 348ddb7b00SSunil Mushran #include <linux/time.h> 35ccd979bdSMark Fasheh 36ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 37ccd979bdSMark Fasheh #include <cluster/masklog.h> 38ccd979bdSMark Fasheh 39ccd979bdSMark Fasheh #include "ocfs2.h" 40d24fbcdaSJoel Becker #include "ocfs2_lockingver.h" 41ccd979bdSMark Fasheh 42ccd979bdSMark Fasheh #include "alloc.h" 43d680efe9SMark Fasheh #include "dcache.h" 44ccd979bdSMark Fasheh #include "dlmglue.h" 45ccd979bdSMark Fasheh #include "extent_map.h" 467f1a37e3STiger Yang #include "file.h" 47ccd979bdSMark Fasheh #include "heartbeat.h" 48ccd979bdSMark Fasheh #include "inode.h" 49ccd979bdSMark Fasheh #include "journal.h" 5024ef1815SJoel Becker #include "stackglue.h" 51ccd979bdSMark Fasheh #include "slot_map.h" 52ccd979bdSMark Fasheh #include "super.h" 53ccd979bdSMark Fasheh #include "uptodate.h" 54ccd979bdSMark Fasheh 55ccd979bdSMark Fasheh #include "buffer_head_io.h" 56ccd979bdSMark Fasheh 57ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 58ccd979bdSMark Fasheh struct list_head mw_item; 59ccd979bdSMark Fasheh int mw_status; 60ccd979bdSMark Fasheh struct completion mw_complete; 61ccd979bdSMark Fasheh unsigned long mw_mask; 62ccd979bdSMark Fasheh unsigned long mw_goal; 638ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 648ddb7b00SSunil Mushran unsigned long long mw_lock_start; 658ddb7b00SSunil Mushran #endif 66ccd979bdSMark Fasheh }; 67ccd979bdSMark Fasheh 6854a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 6954a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 70cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 71ccd979bdSMark Fasheh 72d680efe9SMark Fasheh /* 73cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 74d680efe9SMark Fasheh * 75b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 76d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 77d680efe9SMark Fasheh * 78d680efe9SMark Fasheh */ 79d680efe9SMark Fasheh enum ocfs2_unblock_action { 80d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 81d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 82d680efe9SMark Fasheh * ->post_unlock callback */ 83d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 84d680efe9SMark Fasheh * ->post_unlock() callback. */ 85d680efe9SMark Fasheh }; 86d680efe9SMark Fasheh 87d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 88d680efe9SMark Fasheh int requeue; 89d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 90d680efe9SMark Fasheh }; 91d680efe9SMark Fasheh 92810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 93810d5aebSMark Fasheh int new_level); 94810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 95810d5aebSMark Fasheh 96cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 97cc567d89SMark Fasheh int blocking); 98cc567d89SMark Fasheh 99cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 100cc567d89SMark Fasheh int blocking); 101d680efe9SMark Fasheh 102d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 103d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 104ccd979bdSMark Fasheh 1056cb129f5SAdrian Bunk 1066cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1076cb129f5SAdrian Bunk 1086cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1096cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1106cb129f5SAdrian Bunk const char *function, 1116cb129f5SAdrian Bunk unsigned int line, 1126cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1136cb129f5SAdrian Bunk { 1148f2c9c1bSJoel Becker struct ocfs2_meta_lvb *lvb = 1158f2c9c1bSJoel Becker (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1166cb129f5SAdrian Bunk 1176cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1186cb129f5SAdrian Bunk lockres->l_name, function, line); 1196cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1206cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1216cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1226cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1236cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1246cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1256cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1266cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1276cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1286cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1296cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1306cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1316cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1326cb129f5SAdrian Bunk } 1336cb129f5SAdrian Bunk 1346cb129f5SAdrian Bunk 135f625c979SMark Fasheh /* 136f625c979SMark Fasheh * OCFS2 Lock Resource Operations 137f625c979SMark Fasheh * 138f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1390d5dc6c2SMark Fasheh * 1400d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1410d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1420d5dc6c2SMark Fasheh * 1430d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1440d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1450d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1460d5dc6c2SMark Fasheh * destruction time). 147f625c979SMark Fasheh */ 148ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 14954a7e755SMark Fasheh /* 15054a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 15154a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 15254a7e755SMark Fasheh */ 15354a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 154b5e500e2SMark Fasheh 1550d5dc6c2SMark Fasheh /* 15634d024f8SMark Fasheh * Optionally called in the downconvert thread after a 15734d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 15834d024f8SMark Fasheh * after this callback is called, so it is safe to free 15934d024f8SMark Fasheh * memory, etc. 1600d5dc6c2SMark Fasheh * 1610d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1620d5dc6c2SMark Fasheh * by ->downconvert_worker() 1630d5dc6c2SMark Fasheh */ 164d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 165f625c979SMark Fasheh 166f625c979SMark Fasheh /* 16716d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 16816d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 16916d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 17016d5b956SMark Fasheh * 17116d5b956SMark Fasheh * For most locks, the default checks that there are no 17216d5b956SMark Fasheh * incompatible holders are sufficient. 17316d5b956SMark Fasheh * 17416d5b956SMark Fasheh * Called with the lockres spinlock held. 17516d5b956SMark Fasheh */ 17616d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 17716d5b956SMark Fasheh 17816d5b956SMark Fasheh /* 1795ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1805ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1815ef0d4eaSMark Fasheh * 1825ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1835ef0d4eaSMark Fasheh * in the flags field. 1845ef0d4eaSMark Fasheh * 1855ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 1865ef0d4eaSMark Fasheh */ 1875ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 1885ef0d4eaSMark Fasheh 1895ef0d4eaSMark Fasheh /* 190cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 191cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 192cc567d89SMark Fasheh * any locks held so the function can do work that might 193cc567d89SMark Fasheh * schedule (syncing out data, etc). 194cc567d89SMark Fasheh * 195cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 196cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 197cc567d89SMark Fasheh */ 198cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 199cc567d89SMark Fasheh 200cc567d89SMark Fasheh /* 201f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 202f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 203f625c979SMark Fasheh */ 204f625c979SMark Fasheh int flags; 205ccd979bdSMark Fasheh }; 206ccd979bdSMark Fasheh 207f625c979SMark Fasheh /* 208f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 209f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 210f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 211f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 212f625c979SMark Fasheh * expected that the locking wrapper will clear the 213f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 214f625c979SMark Fasheh */ 215f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 216f625c979SMark Fasheh 217b80fc012SMark Fasheh /* 2185ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2195ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 220b80fc012SMark Fasheh */ 221b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 222b80fc012SMark Fasheh 223ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 22454a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 225f625c979SMark Fasheh .flags = 0, 226ccd979bdSMark Fasheh }; 227ccd979bdSMark Fasheh 228e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 22954a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 230810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 231810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 232f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 233b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 234ccd979bdSMark Fasheh }; 235ccd979bdSMark Fasheh 236ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 237f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 238ccd979bdSMark Fasheh }; 239ccd979bdSMark Fasheh 240ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 241f625c979SMark Fasheh .flags = 0, 242ccd979bdSMark Fasheh }; 243ccd979bdSMark Fasheh 244d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 24554a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 246d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 247cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 248f625c979SMark Fasheh .flags = 0, 249d680efe9SMark Fasheh }; 250d680efe9SMark Fasheh 25150008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 25250008630STiger Yang .get_osb = ocfs2_get_inode_osb, 25350008630STiger Yang .flags = 0, 25450008630STiger Yang }; 25550008630STiger Yang 256cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 257cf8e06f1SMark Fasheh .get_osb = ocfs2_get_file_osb, 258cf8e06f1SMark Fasheh .flags = 0, 259cf8e06f1SMark Fasheh }; 260cf8e06f1SMark Fasheh 261ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 262ccd979bdSMark Fasheh { 263ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 26450008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 26550008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 266ccd979bdSMark Fasheh } 267ccd979bdSMark Fasheh 268ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 269ccd979bdSMark Fasheh { 270ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 271ccd979bdSMark Fasheh 272ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 273ccd979bdSMark Fasheh } 274ccd979bdSMark Fasheh 275d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 276d680efe9SMark Fasheh { 277d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 278d680efe9SMark Fasheh 279d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 280d680efe9SMark Fasheh } 281d680efe9SMark Fasheh 28254a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 28354a7e755SMark Fasheh { 28454a7e755SMark Fasheh if (lockres->l_ops->get_osb) 28554a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 28654a7e755SMark Fasheh 28754a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 28854a7e755SMark Fasheh } 28954a7e755SMark Fasheh 290ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 291ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 292ccd979bdSMark Fasheh int level, 293bd3e7610SJoel Becker u32 dlm_flags); 294ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 295ccd979bdSMark Fasheh int wanted); 296ccd979bdSMark Fasheh static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 297ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 298ccd979bdSMark Fasheh int level); 299ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 300ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 301ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 302ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 303ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 304ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 305ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 306ccd979bdSMark Fasheh int convert); 3077431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 3087431cd7eSJoel Becker mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 3097431cd7eSJoel Becker _err, _func, _lockres->l_name); \ 310ccd979bdSMark Fasheh } while (0) 31134d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 31234d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 313ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 314e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 315ccd979bdSMark Fasheh struct buffer_head **bh); 316ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 317ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 318de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 319cf8e06f1SMark Fasheh int new_level); 320cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 321cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres, 322cf8e06f1SMark Fasheh int new_level, 323de551246SJoel Becker int lvb, 324de551246SJoel Becker unsigned int generation); 325cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 326cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 327cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 328cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 329cf8e06f1SMark Fasheh 330ccd979bdSMark Fasheh 331ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 332ccd979bdSMark Fasheh u64 blkno, 333ccd979bdSMark Fasheh u32 generation, 334ccd979bdSMark Fasheh char *name) 335ccd979bdSMark Fasheh { 336ccd979bdSMark Fasheh int len; 337ccd979bdSMark Fasheh 338ccd979bdSMark Fasheh mlog_entry_void(); 339ccd979bdSMark Fasheh 340ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 341ccd979bdSMark Fasheh 342b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 343b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 344b0697053SMark Fasheh (long long)blkno, generation); 345ccd979bdSMark Fasheh 346ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 347ccd979bdSMark Fasheh 348ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 349ccd979bdSMark Fasheh 350ccd979bdSMark Fasheh mlog_exit_void(); 351ccd979bdSMark Fasheh } 352ccd979bdSMark Fasheh 35334af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 354ccd979bdSMark Fasheh 355ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 356ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 357ccd979bdSMark Fasheh { 358ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 359ccd979bdSMark Fasheh 360ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 361ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 362ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 363ccd979bdSMark Fasheh } 364ccd979bdSMark Fasheh 365ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 366ccd979bdSMark Fasheh { 367ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 368ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 369ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 370ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 371ccd979bdSMark Fasheh } 372ccd979bdSMark Fasheh 3738ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 3748ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 3758ddb7b00SSunil Mushran { 3768ddb7b00SSunil Mushran res->l_lock_num_prmode = 0; 3778ddb7b00SSunil Mushran res->l_lock_num_prmode_failed = 0; 3788ddb7b00SSunil Mushran res->l_lock_total_prmode = 0; 3798ddb7b00SSunil Mushran res->l_lock_max_prmode = 0; 3808ddb7b00SSunil Mushran res->l_lock_num_exmode = 0; 3818ddb7b00SSunil Mushran res->l_lock_num_exmode_failed = 0; 3828ddb7b00SSunil Mushran res->l_lock_total_exmode = 0; 3838ddb7b00SSunil Mushran res->l_lock_max_exmode = 0; 3848ddb7b00SSunil Mushran res->l_lock_refresh = 0; 3858ddb7b00SSunil Mushran } 3868ddb7b00SSunil Mushran 3878ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 3888ddb7b00SSunil Mushran struct ocfs2_mask_waiter *mw, int ret) 3898ddb7b00SSunil Mushran { 3908ddb7b00SSunil Mushran unsigned long long *num, *sum; 3918ddb7b00SSunil Mushran unsigned int *max, *failed; 3928ddb7b00SSunil Mushran struct timespec ts = current_kernel_time(); 3938ddb7b00SSunil Mushran unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; 3948ddb7b00SSunil Mushran 3958ddb7b00SSunil Mushran if (level == LKM_PRMODE) { 3968ddb7b00SSunil Mushran num = &res->l_lock_num_prmode; 3978ddb7b00SSunil Mushran sum = &res->l_lock_total_prmode; 3988ddb7b00SSunil Mushran max = &res->l_lock_max_prmode; 3998ddb7b00SSunil Mushran failed = &res->l_lock_num_prmode_failed; 4008ddb7b00SSunil Mushran } else if (level == LKM_EXMODE) { 4018ddb7b00SSunil Mushran num = &res->l_lock_num_exmode; 4028ddb7b00SSunil Mushran sum = &res->l_lock_total_exmode; 4038ddb7b00SSunil Mushran max = &res->l_lock_max_exmode; 4048ddb7b00SSunil Mushran failed = &res->l_lock_num_exmode_failed; 4058ddb7b00SSunil Mushran } else 4068ddb7b00SSunil Mushran return; 4078ddb7b00SSunil Mushran 4088ddb7b00SSunil Mushran (*num)++; 4098ddb7b00SSunil Mushran (*sum) += time; 4108ddb7b00SSunil Mushran if (time > *max) 4118ddb7b00SSunil Mushran *max = time; 4128ddb7b00SSunil Mushran if (ret) 4138ddb7b00SSunil Mushran (*failed)++; 4148ddb7b00SSunil Mushran } 4158ddb7b00SSunil Mushran 4168ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4178ddb7b00SSunil Mushran { 4188ddb7b00SSunil Mushran lockres->l_lock_refresh++; 4198ddb7b00SSunil Mushran } 4208ddb7b00SSunil Mushran 4218ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4228ddb7b00SSunil Mushran { 4238ddb7b00SSunil Mushran struct timespec ts = current_kernel_time(); 4248ddb7b00SSunil Mushran mw->mw_lock_start = timespec_to_ns(&ts); 4258ddb7b00SSunil Mushran } 4268ddb7b00SSunil Mushran #else 4278ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4288ddb7b00SSunil Mushran { 4298ddb7b00SSunil Mushran } 4308ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 4318ddb7b00SSunil Mushran int level, struct ocfs2_mask_waiter *mw, int ret) 4328ddb7b00SSunil Mushran { 4338ddb7b00SSunil Mushran } 4348ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4358ddb7b00SSunil Mushran { 4368ddb7b00SSunil Mushran } 4378ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4388ddb7b00SSunil Mushran { 4398ddb7b00SSunil Mushran } 4408ddb7b00SSunil Mushran #endif 4418ddb7b00SSunil Mushran 442ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 443ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 444ccd979bdSMark Fasheh enum ocfs2_lock_type type, 445ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 446ccd979bdSMark Fasheh void *priv) 447ccd979bdSMark Fasheh { 448ccd979bdSMark Fasheh res->l_type = type; 449ccd979bdSMark Fasheh res->l_ops = ops; 450ccd979bdSMark Fasheh res->l_priv = priv; 451ccd979bdSMark Fasheh 452bd3e7610SJoel Becker res->l_level = DLM_LOCK_IV; 453bd3e7610SJoel Becker res->l_requested = DLM_LOCK_IV; 454bd3e7610SJoel Becker res->l_blocking = DLM_LOCK_IV; 455ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 456ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 457ccd979bdSMark Fasheh 458ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 459ccd979bdSMark Fasheh 460ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 4618ddb7b00SSunil Mushran 4628ddb7b00SSunil Mushran ocfs2_init_lock_stats(res); 463ccd979bdSMark Fasheh } 464ccd979bdSMark Fasheh 465ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 466ccd979bdSMark Fasheh { 467ccd979bdSMark Fasheh /* This also clears out the lock status block */ 468ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 469ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 470ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 471ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 472ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 473ccd979bdSMark Fasheh } 474ccd979bdSMark Fasheh 475ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 476ccd979bdSMark Fasheh enum ocfs2_lock_type type, 47724c19ef4SMark Fasheh unsigned int generation, 478ccd979bdSMark Fasheh struct inode *inode) 479ccd979bdSMark Fasheh { 480ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 481ccd979bdSMark Fasheh 482ccd979bdSMark Fasheh switch(type) { 483ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 484ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 485ccd979bdSMark Fasheh break; 486ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 487e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 488ccd979bdSMark Fasheh break; 48950008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 49050008630STiger Yang ops = &ocfs2_inode_open_lops; 49150008630STiger Yang break; 492ccd979bdSMark Fasheh default: 493ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 494ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 495ccd979bdSMark Fasheh break; 496ccd979bdSMark Fasheh }; 497ccd979bdSMark Fasheh 498d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 49924c19ef4SMark Fasheh generation, res->l_name); 500d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 501d680efe9SMark Fasheh } 502d680efe9SMark Fasheh 50354a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 50454a7e755SMark Fasheh { 50554a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 50654a7e755SMark Fasheh 50754a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 50854a7e755SMark Fasheh } 50954a7e755SMark Fasheh 510cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 511cf8e06f1SMark Fasheh { 512cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = lockres->l_priv; 513cf8e06f1SMark Fasheh 514cf8e06f1SMark Fasheh return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 515cf8e06f1SMark Fasheh } 516cf8e06f1SMark Fasheh 517d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 518d680efe9SMark Fasheh { 519d680efe9SMark Fasheh __be64 inode_blkno_be; 520d680efe9SMark Fasheh 521d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 522d680efe9SMark Fasheh sizeof(__be64)); 523d680efe9SMark Fasheh 524d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 525d680efe9SMark Fasheh } 526d680efe9SMark Fasheh 52754a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 52854a7e755SMark Fasheh { 52954a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 53054a7e755SMark Fasheh 53154a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 53254a7e755SMark Fasheh } 53354a7e755SMark Fasheh 534d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 535d680efe9SMark Fasheh u64 parent, struct inode *inode) 536d680efe9SMark Fasheh { 537d680efe9SMark Fasheh int len; 538d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 539d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 540d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 541d680efe9SMark Fasheh 542d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 543d680efe9SMark Fasheh 544d680efe9SMark Fasheh /* 545d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 546d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 547d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 548d680efe9SMark Fasheh * want error prints to show something without garbling the 549d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 550d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 551d680efe9SMark Fasheh * binary lock names. The stringified names have been a 552d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 553d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 554d680efe9SMark Fasheh * 555d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 556d680efe9SMark Fasheh * name size stays the same though - the last part is all 557d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 558d680efe9SMark Fasheh */ 559d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 560d680efe9SMark Fasheh "%c%016llx", 561d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 562d680efe9SMark Fasheh (long long)parent); 563d680efe9SMark Fasheh 564d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 565d680efe9SMark Fasheh 566d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 567d680efe9SMark Fasheh sizeof(__be64)); 568d680efe9SMark Fasheh 569d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 570d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 571d680efe9SMark Fasheh dl); 572ccd979bdSMark Fasheh } 573ccd979bdSMark Fasheh 574ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 575ccd979bdSMark Fasheh struct ocfs2_super *osb) 576ccd979bdSMark Fasheh { 577ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 578ccd979bdSMark Fasheh * once on it manually. */ 579ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 580d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 581d680efe9SMark Fasheh 0, res->l_name); 582ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 583ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 584ccd979bdSMark Fasheh } 585ccd979bdSMark Fasheh 586ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 587ccd979bdSMark Fasheh struct ocfs2_super *osb) 588ccd979bdSMark Fasheh { 589ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 590ccd979bdSMark Fasheh * once on it manually. */ 591ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 592d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 593d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 594ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 595ccd979bdSMark Fasheh } 596ccd979bdSMark Fasheh 597cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 598cf8e06f1SMark Fasheh struct ocfs2_file_private *fp) 599cf8e06f1SMark Fasheh { 600cf8e06f1SMark Fasheh struct inode *inode = fp->fp_file->f_mapping->host; 601cf8e06f1SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 602cf8e06f1SMark Fasheh 603cf8e06f1SMark Fasheh ocfs2_lock_res_init_once(lockres); 604cf8e06f1SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 605cf8e06f1SMark Fasheh inode->i_generation, lockres->l_name); 606cf8e06f1SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 607cf8e06f1SMark Fasheh OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 608cf8e06f1SMark Fasheh fp); 609cf8e06f1SMark Fasheh lockres->l_flags |= OCFS2_LOCK_NOCACHE; 610cf8e06f1SMark Fasheh } 611cf8e06f1SMark Fasheh 612ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 613ccd979bdSMark Fasheh { 614ccd979bdSMark Fasheh mlog_entry_void(); 615ccd979bdSMark Fasheh 616ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 617ccd979bdSMark Fasheh return; 618ccd979bdSMark Fasheh 619ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 620ccd979bdSMark Fasheh 621ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 622ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 623ccd979bdSMark Fasheh res->l_name); 624ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 625ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 626ccd979bdSMark Fasheh res->l_name); 627ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 628ccd979bdSMark Fasheh "Lockres %s is locked\n", 629ccd979bdSMark Fasheh res->l_name); 630ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 631ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 632ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 633ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 634ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 635ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 636ccd979bdSMark Fasheh 637ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 638ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 639ccd979bdSMark Fasheh 640ccd979bdSMark Fasheh res->l_flags = 0UL; 641ccd979bdSMark Fasheh mlog_exit_void(); 642ccd979bdSMark Fasheh } 643ccd979bdSMark Fasheh 644ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 645ccd979bdSMark Fasheh int level) 646ccd979bdSMark Fasheh { 647ccd979bdSMark Fasheh mlog_entry_void(); 648ccd979bdSMark Fasheh 649ccd979bdSMark Fasheh BUG_ON(!lockres); 650ccd979bdSMark Fasheh 651ccd979bdSMark Fasheh switch(level) { 652bd3e7610SJoel Becker case DLM_LOCK_EX: 653ccd979bdSMark Fasheh lockres->l_ex_holders++; 654ccd979bdSMark Fasheh break; 655bd3e7610SJoel Becker case DLM_LOCK_PR: 656ccd979bdSMark Fasheh lockres->l_ro_holders++; 657ccd979bdSMark Fasheh break; 658ccd979bdSMark Fasheh default: 659ccd979bdSMark Fasheh BUG(); 660ccd979bdSMark Fasheh } 661ccd979bdSMark Fasheh 662ccd979bdSMark Fasheh mlog_exit_void(); 663ccd979bdSMark Fasheh } 664ccd979bdSMark Fasheh 665ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 666ccd979bdSMark Fasheh int level) 667ccd979bdSMark Fasheh { 668ccd979bdSMark Fasheh mlog_entry_void(); 669ccd979bdSMark Fasheh 670ccd979bdSMark Fasheh BUG_ON(!lockres); 671ccd979bdSMark Fasheh 672ccd979bdSMark Fasheh switch(level) { 673bd3e7610SJoel Becker case DLM_LOCK_EX: 674ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 675ccd979bdSMark Fasheh lockres->l_ex_holders--; 676ccd979bdSMark Fasheh break; 677bd3e7610SJoel Becker case DLM_LOCK_PR: 678ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 679ccd979bdSMark Fasheh lockres->l_ro_holders--; 680ccd979bdSMark Fasheh break; 681ccd979bdSMark Fasheh default: 682ccd979bdSMark Fasheh BUG(); 683ccd979bdSMark Fasheh } 684ccd979bdSMark Fasheh mlog_exit_void(); 685ccd979bdSMark Fasheh } 686ccd979bdSMark Fasheh 687ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 688ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 689ccd979bdSMark Fasheh * lock types are added. */ 690ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 691ccd979bdSMark Fasheh { 692bd3e7610SJoel Becker int new_level = DLM_LOCK_EX; 693ccd979bdSMark Fasheh 694bd3e7610SJoel Becker if (level == DLM_LOCK_EX) 695bd3e7610SJoel Becker new_level = DLM_LOCK_NL; 696bd3e7610SJoel Becker else if (level == DLM_LOCK_PR) 697bd3e7610SJoel Becker new_level = DLM_LOCK_PR; 698ccd979bdSMark Fasheh return new_level; 699ccd979bdSMark Fasheh } 700ccd979bdSMark Fasheh 701ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 702ccd979bdSMark Fasheh unsigned long newflags) 703ccd979bdSMark Fasheh { 704800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 705ccd979bdSMark Fasheh 706ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 707ccd979bdSMark Fasheh 708ccd979bdSMark Fasheh lockres->l_flags = newflags; 709ccd979bdSMark Fasheh 710800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 711ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 712ccd979bdSMark Fasheh continue; 713ccd979bdSMark Fasheh 714ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 715ccd979bdSMark Fasheh mw->mw_status = 0; 716ccd979bdSMark Fasheh complete(&mw->mw_complete); 717ccd979bdSMark Fasheh } 718ccd979bdSMark Fasheh } 719ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 720ccd979bdSMark Fasheh { 721ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 722ccd979bdSMark Fasheh } 723ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 724ccd979bdSMark Fasheh unsigned long clear) 725ccd979bdSMark Fasheh { 726ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 727ccd979bdSMark Fasheh } 728ccd979bdSMark Fasheh 729ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 730ccd979bdSMark Fasheh { 731ccd979bdSMark Fasheh mlog_entry_void(); 732ccd979bdSMark Fasheh 733ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 734ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 735ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 736bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 737ccd979bdSMark Fasheh 738ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 739ccd979bdSMark Fasheh if (lockres->l_level <= 740ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 741bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_NL; 742ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 743ccd979bdSMark Fasheh } 744ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 745ccd979bdSMark Fasheh 746ccd979bdSMark Fasheh mlog_exit_void(); 747ccd979bdSMark Fasheh } 748ccd979bdSMark Fasheh 749ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 750ccd979bdSMark Fasheh { 751ccd979bdSMark Fasheh mlog_entry_void(); 752ccd979bdSMark Fasheh 753ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 754ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 755ccd979bdSMark Fasheh 756ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 757ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 758ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 759ccd979bdSMark Fasheh * update */ 760bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_NL && 761f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 762ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 763ccd979bdSMark Fasheh 764ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 765ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 766ccd979bdSMark Fasheh 767ccd979bdSMark Fasheh mlog_exit_void(); 768ccd979bdSMark Fasheh } 769ccd979bdSMark Fasheh 770ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 771ccd979bdSMark Fasheh { 772ccd979bdSMark Fasheh mlog_entry_void(); 773ccd979bdSMark Fasheh 7743cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 775ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 776ccd979bdSMark Fasheh 777bd3e7610SJoel Becker if (lockres->l_requested > DLM_LOCK_NL && 778f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 779f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 780ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 781ccd979bdSMark Fasheh 782ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 783ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 784ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 785ccd979bdSMark Fasheh 786ccd979bdSMark Fasheh mlog_exit_void(); 787ccd979bdSMark Fasheh } 788ccd979bdSMark Fasheh 789ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 790ccd979bdSMark Fasheh int level) 791ccd979bdSMark Fasheh { 792ccd979bdSMark Fasheh int needs_downconvert = 0; 793ccd979bdSMark Fasheh mlog_entry_void(); 794ccd979bdSMark Fasheh 795ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 796ccd979bdSMark Fasheh 797ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 798ccd979bdSMark Fasheh 799ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 800ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 801ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 802ccd979bdSMark Fasheh * blocking. this also catches the case where we get 803ccd979bdSMark Fasheh * duplicate BASTs */ 804ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 805ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 806ccd979bdSMark Fasheh needs_downconvert = 1; 807ccd979bdSMark Fasheh 808ccd979bdSMark Fasheh lockres->l_blocking = level; 809ccd979bdSMark Fasheh } 810ccd979bdSMark Fasheh 811ccd979bdSMark Fasheh mlog_exit(needs_downconvert); 812ccd979bdSMark Fasheh return needs_downconvert; 813ccd979bdSMark Fasheh } 814ccd979bdSMark Fasheh 815de551246SJoel Becker /* 816de551246SJoel Becker * OCFS2_LOCK_PENDING and l_pending_gen. 817de551246SJoel Becker * 818de551246SJoel Becker * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 819de551246SJoel Becker * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 820de551246SJoel Becker * for more details on the race. 821de551246SJoel Becker * 822de551246SJoel Becker * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 823de551246SJoel Becker * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 824de551246SJoel Becker * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 825de551246SJoel Becker * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 826de551246SJoel Becker * the caller is going to try to clear PENDING again. If nothing else is 827de551246SJoel Becker * happening, __lockres_clear_pending() sees PENDING is unset and does 828de551246SJoel Becker * nothing. 829de551246SJoel Becker * 830de551246SJoel Becker * But what if another path (eg downconvert thread) has just started a 831de551246SJoel Becker * new locking action? The other path has re-set PENDING. Our path 832de551246SJoel Becker * cannot clear PENDING, because that will re-open the original race 833de551246SJoel Becker * window. 834de551246SJoel Becker * 835de551246SJoel Becker * [Example] 836de551246SJoel Becker * 837de551246SJoel Becker * ocfs2_meta_lock() 838de551246SJoel Becker * ocfs2_cluster_lock() 839de551246SJoel Becker * set BUSY 840de551246SJoel Becker * set PENDING 841de551246SJoel Becker * drop l_lock 842de551246SJoel Becker * ocfs2_dlm_lock() 843de551246SJoel Becker * ocfs2_locking_ast() ocfs2_downconvert_thread() 844de551246SJoel Becker * clear PENDING ocfs2_unblock_lock() 845de551246SJoel Becker * take_l_lock 846de551246SJoel Becker * !BUSY 847de551246SJoel Becker * ocfs2_prepare_downconvert() 848de551246SJoel Becker * set BUSY 849de551246SJoel Becker * set PENDING 850de551246SJoel Becker * drop l_lock 851de551246SJoel Becker * take l_lock 852de551246SJoel Becker * clear PENDING 853de551246SJoel Becker * drop l_lock 854de551246SJoel Becker * <window> 855de551246SJoel Becker * ocfs2_dlm_lock() 856de551246SJoel Becker * 857de551246SJoel Becker * So as you can see, we now have a window where l_lock is not held, 858de551246SJoel Becker * PENDING is not set, and ocfs2_dlm_lock() has not been called. 859de551246SJoel Becker * 860de551246SJoel Becker * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 861de551246SJoel Becker * set by ocfs2_prepare_downconvert(). That wasn't nice. 862de551246SJoel Becker * 863de551246SJoel Becker * To solve this we introduce l_pending_gen. A call to 864de551246SJoel Becker * lockres_clear_pending() will only do so when it is passed a generation 865de551246SJoel Becker * number that matches the lockres. lockres_set_pending() will return the 866de551246SJoel Becker * current generation number. When ocfs2_cluster_lock() goes to clear 867de551246SJoel Becker * PENDING, it passes the generation it got from set_pending(). In our 868de551246SJoel Becker * example above, the generation numbers will *not* match. Thus, 869de551246SJoel Becker * ocfs2_cluster_lock() will not clear the PENDING set by 870de551246SJoel Becker * ocfs2_prepare_downconvert(). 871de551246SJoel Becker */ 872de551246SJoel Becker 873de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */ 874de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 875de551246SJoel Becker unsigned int generation, 876de551246SJoel Becker struct ocfs2_super *osb) 877de551246SJoel Becker { 878de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 879de551246SJoel Becker 880de551246SJoel Becker /* 881de551246SJoel Becker * The ast and locking functions can race us here. The winner 882de551246SJoel Becker * will clear pending, the loser will not. 883de551246SJoel Becker */ 884de551246SJoel Becker if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 885de551246SJoel Becker (lockres->l_pending_gen != generation)) 886de551246SJoel Becker return; 887de551246SJoel Becker 888de551246SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 889de551246SJoel Becker lockres->l_pending_gen++; 890de551246SJoel Becker 891de551246SJoel Becker /* 892de551246SJoel Becker * The downconvert thread may have skipped us because we 893de551246SJoel Becker * were PENDING. Wake it up. 894de551246SJoel Becker */ 895de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 896de551246SJoel Becker ocfs2_wake_downconvert_thread(osb); 897de551246SJoel Becker } 898de551246SJoel Becker 899de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */ 900de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 901de551246SJoel Becker unsigned int generation, 902de551246SJoel Becker struct ocfs2_super *osb) 903de551246SJoel Becker { 904de551246SJoel Becker unsigned long flags; 905de551246SJoel Becker 906de551246SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 907de551246SJoel Becker __lockres_clear_pending(lockres, generation, osb); 908de551246SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 909de551246SJoel Becker } 910de551246SJoel Becker 911de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 912de551246SJoel Becker { 913de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 914de551246SJoel Becker BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 915de551246SJoel Becker 916de551246SJoel Becker lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 917de551246SJoel Becker 918de551246SJoel Becker return lockres->l_pending_gen; 919de551246SJoel Becker } 920de551246SJoel Becker 921de551246SJoel Becker 922aa2623adSMark Fasheh static void ocfs2_blocking_ast(void *opaque, int level) 923ccd979bdSMark Fasheh { 924aa2623adSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 925aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 926ccd979bdSMark Fasheh int needs_downconvert; 927ccd979bdSMark Fasheh unsigned long flags; 928ccd979bdSMark Fasheh 929bd3e7610SJoel Becker BUG_ON(level <= DLM_LOCK_NL); 930ccd979bdSMark Fasheh 931aa2623adSMark Fasheh mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 932aa2623adSMark Fasheh lockres->l_name, level, lockres->l_level, 933aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 934aa2623adSMark Fasheh 935cf8e06f1SMark Fasheh /* 936cf8e06f1SMark Fasheh * We can skip the bast for locks which don't enable caching - 937cf8e06f1SMark Fasheh * they'll be dropped at the earliest possible time anyway. 938cf8e06f1SMark Fasheh */ 939cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 940cf8e06f1SMark Fasheh return; 941cf8e06f1SMark Fasheh 942ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 943ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 944ccd979bdSMark Fasheh if (needs_downconvert) 945ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 946ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 947ccd979bdSMark Fasheh 948d680efe9SMark Fasheh wake_up(&lockres->l_event); 949d680efe9SMark Fasheh 95034d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 951ccd979bdSMark Fasheh } 952ccd979bdSMark Fasheh 953e92d57dfSMark Fasheh static void ocfs2_locking_ast(void *opaque) 954ccd979bdSMark Fasheh { 955e92d57dfSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 956de551246SJoel Becker struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 957ccd979bdSMark Fasheh unsigned long flags; 9581693a5c0SDavid Teigland int status; 959ccd979bdSMark Fasheh 960ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 961ccd979bdSMark Fasheh 9621693a5c0SDavid Teigland status = ocfs2_dlm_lock_status(&lockres->l_lksb); 9631693a5c0SDavid Teigland 9641693a5c0SDavid Teigland if (status == -EAGAIN) { 9651693a5c0SDavid Teigland lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 9661693a5c0SDavid Teigland goto out; 9671693a5c0SDavid Teigland } 9681693a5c0SDavid Teigland 9691693a5c0SDavid Teigland if (status) { 9708f2c9c1bSJoel Becker mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 9711693a5c0SDavid Teigland lockres->l_name, status); 972ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 973ccd979bdSMark Fasheh return; 974ccd979bdSMark Fasheh } 975ccd979bdSMark Fasheh 976ccd979bdSMark Fasheh switch(lockres->l_action) { 977ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 978ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 979e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 980ccd979bdSMark Fasheh break; 981ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 982ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 983ccd979bdSMark Fasheh break; 984ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 985ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 986ccd979bdSMark Fasheh break; 987ccd979bdSMark Fasheh default: 988e92d57dfSMark Fasheh mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 989e92d57dfSMark Fasheh "lockres flags = 0x%lx, unlock action: %u\n", 990e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 991e92d57dfSMark Fasheh lockres->l_unlock_action); 992ccd979bdSMark Fasheh BUG(); 993ccd979bdSMark Fasheh } 9941693a5c0SDavid Teigland out: 995ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 996ccd979bdSMark Fasheh * can catch it. */ 997ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 998ccd979bdSMark Fasheh 999de551246SJoel Becker /* Did we try to cancel this lock? Clear that state */ 1000de551246SJoel Becker if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1001de551246SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1002de551246SJoel Becker 1003de551246SJoel Becker /* 1004de551246SJoel Becker * We may have beaten the locking functions here. We certainly 1005de551246SJoel Becker * know that dlm_lock() has been called :-) 1006de551246SJoel Becker * Because we can't have two lock calls in flight at once, we 1007de551246SJoel Becker * can use lockres->l_pending_gen. 1008de551246SJoel Becker */ 1009de551246SJoel Becker __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1010de551246SJoel Becker 1011ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1012d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1013ccd979bdSMark Fasheh } 1014ccd979bdSMark Fasheh 1015ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1016ccd979bdSMark Fasheh int convert) 1017ccd979bdSMark Fasheh { 1018ccd979bdSMark Fasheh unsigned long flags; 1019ccd979bdSMark Fasheh 1020ccd979bdSMark Fasheh mlog_entry_void(); 1021ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1022ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1023ccd979bdSMark Fasheh if (convert) 1024ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1025ccd979bdSMark Fasheh else 1026ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1027ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1028ccd979bdSMark Fasheh 1029ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1030ccd979bdSMark Fasheh mlog_exit_void(); 1031ccd979bdSMark Fasheh } 1032ccd979bdSMark Fasheh 1033ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 1034ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1035ccd979bdSMark Fasheh * to do the right thing in that case. 1036ccd979bdSMark Fasheh */ 1037ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 1038ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1039ccd979bdSMark Fasheh int level, 1040bd3e7610SJoel Becker u32 dlm_flags) 1041ccd979bdSMark Fasheh { 1042ccd979bdSMark Fasheh int ret = 0; 1043ccd979bdSMark Fasheh unsigned long flags; 1044de551246SJoel Becker unsigned int gen; 1045ccd979bdSMark Fasheh 1046ccd979bdSMark Fasheh mlog_entry_void(); 1047ccd979bdSMark Fasheh 1048bd3e7610SJoel Becker mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1049ccd979bdSMark Fasheh dlm_flags); 1050ccd979bdSMark Fasheh 1051ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1052ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1053ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1054ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1055ccd979bdSMark Fasheh goto bail; 1056ccd979bdSMark Fasheh } 1057ccd979bdSMark Fasheh 1058ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1059ccd979bdSMark Fasheh lockres->l_requested = level; 1060ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1061de551246SJoel Becker gen = lockres_set_pending(lockres); 1062ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1063ccd979bdSMark Fasheh 10644670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1065ccd979bdSMark Fasheh level, 1066ccd979bdSMark Fasheh &lockres->l_lksb, 1067ccd979bdSMark Fasheh dlm_flags, 1068ccd979bdSMark Fasheh lockres->l_name, 1069f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 107024ef1815SJoel Becker lockres); 1071de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 10727431cd7eSJoel Becker if (ret) { 10737431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1074ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1075ccd979bdSMark Fasheh } 1076ccd979bdSMark Fasheh 10777431cd7eSJoel Becker mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1078ccd979bdSMark Fasheh 1079ccd979bdSMark Fasheh bail: 1080ccd979bdSMark Fasheh mlog_exit(ret); 1081ccd979bdSMark Fasheh return ret; 1082ccd979bdSMark Fasheh } 1083ccd979bdSMark Fasheh 1084ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1085ccd979bdSMark Fasheh int flag) 1086ccd979bdSMark Fasheh { 1087ccd979bdSMark Fasheh unsigned long flags; 1088ccd979bdSMark Fasheh int ret; 1089ccd979bdSMark Fasheh 1090ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1091ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 1092ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1093ccd979bdSMark Fasheh 1094ccd979bdSMark Fasheh return ret; 1095ccd979bdSMark Fasheh } 1096ccd979bdSMark Fasheh 1097ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1098ccd979bdSMark Fasheh 1099ccd979bdSMark Fasheh { 1100ccd979bdSMark Fasheh wait_event(lockres->l_event, 1101ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1102ccd979bdSMark Fasheh } 1103ccd979bdSMark Fasheh 1104ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1105ccd979bdSMark Fasheh 1106ccd979bdSMark Fasheh { 1107ccd979bdSMark Fasheh wait_event(lockres->l_event, 1108ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1109ccd979bdSMark Fasheh } 1110ccd979bdSMark Fasheh 1111ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 1112ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 1113ccd979bdSMark Fasheh * level will be compatible with it. */ 1114ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1115ccd979bdSMark Fasheh int wanted) 1116ccd979bdSMark Fasheh { 1117ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1118ccd979bdSMark Fasheh 1119ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1120ccd979bdSMark Fasheh } 1121ccd979bdSMark Fasheh 1122ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1123ccd979bdSMark Fasheh { 1124ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 1125ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 11268ddb7b00SSunil Mushran ocfs2_init_start_time(mw); 1127ccd979bdSMark Fasheh } 1128ccd979bdSMark Fasheh 1129ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1130ccd979bdSMark Fasheh { 1131ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 1132ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1133ccd979bdSMark Fasheh INIT_COMPLETION(mw->mw_complete); 1134ccd979bdSMark Fasheh return mw->mw_status; 1135ccd979bdSMark Fasheh } 1136ccd979bdSMark Fasheh 1137ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1138ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 1139ccd979bdSMark Fasheh unsigned long mask, 1140ccd979bdSMark Fasheh unsigned long goal) 1141ccd979bdSMark Fasheh { 1142ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 1143ccd979bdSMark Fasheh 1144ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 1145ccd979bdSMark Fasheh 1146ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1147ccd979bdSMark Fasheh mw->mw_mask = mask; 1148ccd979bdSMark Fasheh mw->mw_goal = goal; 1149ccd979bdSMark Fasheh } 1150ccd979bdSMark Fasheh 1151ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1152ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 1153ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1154ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 1155ccd979bdSMark Fasheh { 1156ccd979bdSMark Fasheh unsigned long flags; 1157ccd979bdSMark Fasheh int ret = 0; 1158ccd979bdSMark Fasheh 1159ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1160ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 1161ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1162ccd979bdSMark Fasheh ret = -EBUSY; 1163ccd979bdSMark Fasheh 1164ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 1165ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1166ccd979bdSMark Fasheh } 1167ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1168ccd979bdSMark Fasheh 1169ccd979bdSMark Fasheh return ret; 1170ccd979bdSMark Fasheh 1171ccd979bdSMark Fasheh } 1172ccd979bdSMark Fasheh 1173cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1174cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres) 1175cf8e06f1SMark Fasheh { 1176cf8e06f1SMark Fasheh int ret; 1177cf8e06f1SMark Fasheh 1178cf8e06f1SMark Fasheh ret = wait_for_completion_interruptible(&mw->mw_complete); 1179cf8e06f1SMark Fasheh if (ret) 1180cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, mw); 1181cf8e06f1SMark Fasheh else 1182cf8e06f1SMark Fasheh ret = mw->mw_status; 1183cf8e06f1SMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1184cf8e06f1SMark Fasheh INIT_COMPLETION(mw->mw_complete); 1185cf8e06f1SMark Fasheh return ret; 1186cf8e06f1SMark Fasheh } 1187cf8e06f1SMark Fasheh 1188ccd979bdSMark Fasheh static int ocfs2_cluster_lock(struct ocfs2_super *osb, 1189ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1190ccd979bdSMark Fasheh int level, 1191bd3e7610SJoel Becker u32 lkm_flags, 1192ccd979bdSMark Fasheh int arg_flags) 1193ccd979bdSMark Fasheh { 1194ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 1195ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1196ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1197ccd979bdSMark Fasheh unsigned long flags; 1198de551246SJoel Becker unsigned int gen; 11991693a5c0SDavid Teigland int noqueue_attempted = 0; 1200ccd979bdSMark Fasheh 1201ccd979bdSMark Fasheh mlog_entry_void(); 1202ccd979bdSMark Fasheh 1203ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 1204ccd979bdSMark Fasheh 1205b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1206bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 1207b80fc012SMark Fasheh 1208ccd979bdSMark Fasheh again: 1209ccd979bdSMark Fasheh wait = 0; 1210ccd979bdSMark Fasheh 1211ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 1212ccd979bdSMark Fasheh ret = -ERESTARTSYS; 1213ccd979bdSMark Fasheh goto out; 1214ccd979bdSMark Fasheh } 1215ccd979bdSMark Fasheh 1216ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1217ccd979bdSMark Fasheh 1218ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1219ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 1220ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 1221ccd979bdSMark Fasheh 1222ccd979bdSMark Fasheh /* We only compare against the currently granted level 1223ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 1224ccd979bdSMark Fasheh * we'll get caught below. */ 1225ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 1226ccd979bdSMark Fasheh level > lockres->l_level) { 1227ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 1228ccd979bdSMark Fasheh * them. */ 1229ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1230ccd979bdSMark Fasheh wait = 1; 1231ccd979bdSMark Fasheh goto unlock; 1232ccd979bdSMark Fasheh } 1233ccd979bdSMark Fasheh 1234ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1235ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1236ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 1237ccd979bdSMark Fasheh * another node */ 1238ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1239ccd979bdSMark Fasheh wait = 1; 1240ccd979bdSMark Fasheh goto unlock; 1241ccd979bdSMark Fasheh } 1242ccd979bdSMark Fasheh 1243ccd979bdSMark Fasheh if (level > lockres->l_level) { 12441693a5c0SDavid Teigland if (noqueue_attempted > 0) { 12451693a5c0SDavid Teigland ret = -EAGAIN; 12461693a5c0SDavid Teigland goto unlock; 12471693a5c0SDavid Teigland } 12481693a5c0SDavid Teigland if (lkm_flags & DLM_LKF_NOQUEUE) 12491693a5c0SDavid Teigland noqueue_attempted = 1; 12501693a5c0SDavid Teigland 1251ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 1252ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 1253ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 1254ccd979bdSMark Fasheh 1255019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1256019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1257bd3e7610SJoel Becker lkm_flags &= ~DLM_LKF_CONVERT; 1258019d1b22SMark Fasheh } else { 1259ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1260bd3e7610SJoel Becker lkm_flags |= DLM_LKF_CONVERT; 1261019d1b22SMark Fasheh } 1262019d1b22SMark Fasheh 1263ccd979bdSMark Fasheh lockres->l_requested = level; 1264ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1265de551246SJoel Becker gen = lockres_set_pending(lockres); 1266ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1267ccd979bdSMark Fasheh 1268bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_IV); 1269bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_NL); 1270ccd979bdSMark Fasheh 1271ccd979bdSMark Fasheh mlog(0, "lock %s, convert from %d to level = %d\n", 1272ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1273ccd979bdSMark Fasheh 1274ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 12754670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1276ccd979bdSMark Fasheh level, 1277ccd979bdSMark Fasheh &lockres->l_lksb, 1278019d1b22SMark Fasheh lkm_flags, 1279ccd979bdSMark Fasheh lockres->l_name, 1280f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 128124ef1815SJoel Becker lockres); 1282de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 12837431cd7eSJoel Becker if (ret) { 12847431cd7eSJoel Becker if (!(lkm_flags & DLM_LKF_NOQUEUE) || 12857431cd7eSJoel Becker (ret != -EAGAIN)) { 128624ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", 12877431cd7eSJoel Becker ret, lockres); 1288ccd979bdSMark Fasheh } 1289ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1290ccd979bdSMark Fasheh goto out; 1291ccd979bdSMark Fasheh } 1292ccd979bdSMark Fasheh 129324ef1815SJoel Becker mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", 1294ccd979bdSMark Fasheh lockres->l_name); 1295ccd979bdSMark Fasheh 1296ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1297ccd979bdSMark Fasheh * complete our work regardless. */ 1298ccd979bdSMark Fasheh catch_signals = 0; 1299ccd979bdSMark Fasheh 1300ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1301ccd979bdSMark Fasheh goto again; 1302ccd979bdSMark Fasheh } 1303ccd979bdSMark Fasheh 1304ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1305ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1306ccd979bdSMark Fasheh 1307ccd979bdSMark Fasheh ret = 0; 1308ccd979bdSMark Fasheh unlock: 1309ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1310ccd979bdSMark Fasheh out: 1311ccd979bdSMark Fasheh /* 1312ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1313ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1314ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1315ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1316ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1317ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1318ccd979bdSMark Fasheh */ 1319ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1320ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1321ccd979bdSMark Fasheh wait = 0; 1322ccd979bdSMark Fasheh if (lockres_remove_mask_waiter(lockres, &mw)) 1323ccd979bdSMark Fasheh ret = -EAGAIN; 1324ccd979bdSMark Fasheh else 1325ccd979bdSMark Fasheh goto again; 1326ccd979bdSMark Fasheh } 1327ccd979bdSMark Fasheh if (wait) { 1328ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1329ccd979bdSMark Fasheh if (ret == 0) 1330ccd979bdSMark Fasheh goto again; 1331ccd979bdSMark Fasheh mlog_errno(ret); 1332ccd979bdSMark Fasheh } 13338ddb7b00SSunil Mushran ocfs2_update_lock_stats(lockres, level, &mw, ret); 1334ccd979bdSMark Fasheh 1335ccd979bdSMark Fasheh mlog_exit(ret); 1336ccd979bdSMark Fasheh return ret; 1337ccd979bdSMark Fasheh } 1338ccd979bdSMark Fasheh 1339ccd979bdSMark Fasheh static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 1340ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1341ccd979bdSMark Fasheh int level) 1342ccd979bdSMark Fasheh { 1343ccd979bdSMark Fasheh unsigned long flags; 1344ccd979bdSMark Fasheh 1345ccd979bdSMark Fasheh mlog_entry_void(); 1346ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1347ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 134834d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1349ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1350ccd979bdSMark Fasheh mlog_exit_void(); 1351ccd979bdSMark Fasheh } 1352ccd979bdSMark Fasheh 1353da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1354d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 135524c19ef4SMark Fasheh int ex, 135624c19ef4SMark Fasheh int local) 1357ccd979bdSMark Fasheh { 1358bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1359ccd979bdSMark Fasheh unsigned long flags; 1360bd3e7610SJoel Becker u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1361ccd979bdSMark Fasheh 1362ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1363ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1364ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1365ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1366ccd979bdSMark Fasheh 136724c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1368ccd979bdSMark Fasheh } 1369ccd979bdSMark Fasheh 1370ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1371ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1372ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1373ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1374ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1375ccd979bdSMark Fasheh * with creating a new lock resource. */ 1376ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1377ccd979bdSMark Fasheh { 1378ccd979bdSMark Fasheh int ret; 1379d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1380ccd979bdSMark Fasheh 1381ccd979bdSMark Fasheh BUG_ON(!inode); 1382ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1383ccd979bdSMark Fasheh 1384ccd979bdSMark Fasheh mlog_entry_void(); 1385ccd979bdSMark Fasheh 1386b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1387ccd979bdSMark Fasheh 1388ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1389ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1390ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1391ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1392ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1393ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1394ccd979bdSMark Fasheh * valid when we release the EX. */ 1395ccd979bdSMark Fasheh 139624c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1397ccd979bdSMark Fasheh if (ret) { 1398ccd979bdSMark Fasheh mlog_errno(ret); 1399ccd979bdSMark Fasheh goto bail; 1400ccd979bdSMark Fasheh } 1401ccd979bdSMark Fasheh 140224c19ef4SMark Fasheh /* 1403bd3e7610SJoel Becker * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 140424c19ef4SMark Fasheh * don't use a generation in their lock names. 140524c19ef4SMark Fasheh */ 1406e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1407ccd979bdSMark Fasheh if (ret) { 1408ccd979bdSMark Fasheh mlog_errno(ret); 1409ccd979bdSMark Fasheh goto bail; 1410ccd979bdSMark Fasheh } 1411ccd979bdSMark Fasheh 141250008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 141350008630STiger Yang if (ret) { 141450008630STiger Yang mlog_errno(ret); 141550008630STiger Yang goto bail; 141650008630STiger Yang } 141750008630STiger Yang 1418ccd979bdSMark Fasheh bail: 1419ccd979bdSMark Fasheh mlog_exit(ret); 1420ccd979bdSMark Fasheh return ret; 1421ccd979bdSMark Fasheh } 1422ccd979bdSMark Fasheh 1423ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1424ccd979bdSMark Fasheh { 1425ccd979bdSMark Fasheh int status, level; 1426ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1427c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1428ccd979bdSMark Fasheh 1429ccd979bdSMark Fasheh BUG_ON(!inode); 1430ccd979bdSMark Fasheh 1431ccd979bdSMark Fasheh mlog_entry_void(); 1432ccd979bdSMark Fasheh 1433b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1434b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1435ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1436ccd979bdSMark Fasheh 1437c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 1438c271c5c2SSunil Mushran return 0; 1439c271c5c2SSunil Mushran 1440ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1441ccd979bdSMark Fasheh 1442bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1443ccd979bdSMark Fasheh 1444ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1445ccd979bdSMark Fasheh 0); 1446ccd979bdSMark Fasheh if (status < 0) 1447ccd979bdSMark Fasheh mlog_errno(status); 1448ccd979bdSMark Fasheh 1449ccd979bdSMark Fasheh mlog_exit(status); 1450ccd979bdSMark Fasheh return status; 1451ccd979bdSMark Fasheh } 1452ccd979bdSMark Fasheh 1453ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1454ccd979bdSMark Fasheh { 1455bd3e7610SJoel Becker int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1456ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1457c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1458ccd979bdSMark Fasheh 1459ccd979bdSMark Fasheh mlog_entry_void(); 1460ccd979bdSMark Fasheh 1461b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1462b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1463ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1464ccd979bdSMark Fasheh 1465c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1466ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1467ccd979bdSMark Fasheh 1468ccd979bdSMark Fasheh mlog_exit_void(); 1469ccd979bdSMark Fasheh } 1470ccd979bdSMark Fasheh 147150008630STiger Yang /* 147250008630STiger Yang * ocfs2_open_lock always get PR mode lock. 147350008630STiger Yang */ 147450008630STiger Yang int ocfs2_open_lock(struct inode *inode) 147550008630STiger Yang { 147650008630STiger Yang int status = 0; 147750008630STiger Yang struct ocfs2_lock_res *lockres; 147850008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 147950008630STiger Yang 148050008630STiger Yang BUG_ON(!inode); 148150008630STiger Yang 148250008630STiger Yang mlog_entry_void(); 148350008630STiger Yang 148450008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 148550008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 148650008630STiger Yang 148750008630STiger Yang if (ocfs2_mount_local(osb)) 148850008630STiger Yang goto out; 148950008630STiger Yang 149050008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 149150008630STiger Yang 149250008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1493bd3e7610SJoel Becker DLM_LOCK_PR, 0, 0); 149450008630STiger Yang if (status < 0) 149550008630STiger Yang mlog_errno(status); 149650008630STiger Yang 149750008630STiger Yang out: 149850008630STiger Yang mlog_exit(status); 149950008630STiger Yang return status; 150050008630STiger Yang } 150150008630STiger Yang 150250008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 150350008630STiger Yang { 150450008630STiger Yang int status = 0, level; 150550008630STiger Yang struct ocfs2_lock_res *lockres; 150650008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 150750008630STiger Yang 150850008630STiger Yang BUG_ON(!inode); 150950008630STiger Yang 151050008630STiger Yang mlog_entry_void(); 151150008630STiger Yang 151250008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 151350008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 151450008630STiger Yang write ? "EXMODE" : "PRMODE"); 151550008630STiger Yang 151650008630STiger Yang if (ocfs2_mount_local(osb)) 151750008630STiger Yang goto out; 151850008630STiger Yang 151950008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 152050008630STiger Yang 1521bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 152250008630STiger Yang 152350008630STiger Yang /* 152450008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 1525bd3e7610SJoel Becker * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 152650008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 152750008630STiger Yang * this inode is still in use. 152850008630STiger Yang */ 152950008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1530bd3e7610SJoel Becker level, DLM_LKF_NOQUEUE, 0); 153150008630STiger Yang 153250008630STiger Yang out: 153350008630STiger Yang mlog_exit(status); 153450008630STiger Yang return status; 153550008630STiger Yang } 153650008630STiger Yang 153750008630STiger Yang /* 153850008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 153950008630STiger Yang */ 154050008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 154150008630STiger Yang { 154250008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 154350008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 154450008630STiger Yang 154550008630STiger Yang mlog_entry_void(); 154650008630STiger Yang 154750008630STiger Yang mlog(0, "inode %llu drop open lock\n", 154850008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 154950008630STiger Yang 155050008630STiger Yang if (ocfs2_mount_local(osb)) 155150008630STiger Yang goto out; 155250008630STiger Yang 155350008630STiger Yang if(lockres->l_ro_holders) 155450008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1555bd3e7610SJoel Becker DLM_LOCK_PR); 155650008630STiger Yang if(lockres->l_ex_holders) 155750008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1558bd3e7610SJoel Becker DLM_LOCK_EX); 155950008630STiger Yang 156050008630STiger Yang out: 156150008630STiger Yang mlog_exit_void(); 156250008630STiger Yang } 156350008630STiger Yang 1564cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1565cf8e06f1SMark Fasheh int level) 1566cf8e06f1SMark Fasheh { 1567cf8e06f1SMark Fasheh int ret; 1568cf8e06f1SMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1569cf8e06f1SMark Fasheh unsigned long flags; 1570cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1571cf8e06f1SMark Fasheh 1572cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1573cf8e06f1SMark Fasheh 1574cf8e06f1SMark Fasheh retry_cancel: 1575cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1576cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1577cf8e06f1SMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 1578cf8e06f1SMark Fasheh if (ret) { 1579cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1580cf8e06f1SMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 1581cf8e06f1SMark Fasheh if (ret < 0) { 1582cf8e06f1SMark Fasheh mlog_errno(ret); 1583cf8e06f1SMark Fasheh goto out; 1584cf8e06f1SMark Fasheh } 1585cf8e06f1SMark Fasheh goto retry_cancel; 1586cf8e06f1SMark Fasheh } 1587cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1588cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1589cf8e06f1SMark Fasheh 1590cf8e06f1SMark Fasheh ocfs2_wait_for_mask(&mw); 1591cf8e06f1SMark Fasheh goto retry_cancel; 1592cf8e06f1SMark Fasheh } 1593cf8e06f1SMark Fasheh 1594cf8e06f1SMark Fasheh ret = -ERESTARTSYS; 1595cf8e06f1SMark Fasheh /* 1596cf8e06f1SMark Fasheh * We may still have gotten the lock, in which case there's no 1597cf8e06f1SMark Fasheh * point to restarting the syscall. 1598cf8e06f1SMark Fasheh */ 1599cf8e06f1SMark Fasheh if (lockres->l_level == level) 1600cf8e06f1SMark Fasheh ret = 0; 1601cf8e06f1SMark Fasheh 1602cf8e06f1SMark Fasheh mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1603cf8e06f1SMark Fasheh lockres->l_flags, lockres->l_level, lockres->l_action); 1604cf8e06f1SMark Fasheh 1605cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1606cf8e06f1SMark Fasheh 1607cf8e06f1SMark Fasheh out: 1608cf8e06f1SMark Fasheh return ret; 1609cf8e06f1SMark Fasheh } 1610cf8e06f1SMark Fasheh 1611cf8e06f1SMark Fasheh /* 1612cf8e06f1SMark Fasheh * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1613cf8e06f1SMark Fasheh * flock() calls. The locking approach this requires is sufficiently 1614cf8e06f1SMark Fasheh * different from all other cluster lock types that we implement a 1615cf8e06f1SMark Fasheh * seperate path to the "low-level" dlm calls. In particular: 1616cf8e06f1SMark Fasheh * 1617cf8e06f1SMark Fasheh * - No optimization of lock levels is done - we take at exactly 1618cf8e06f1SMark Fasheh * what's been requested. 1619cf8e06f1SMark Fasheh * 1620cf8e06f1SMark Fasheh * - No lock caching is employed. We immediately downconvert to 1621cf8e06f1SMark Fasheh * no-lock at unlock time. This also means flock locks never go on 1622cf8e06f1SMark Fasheh * the blocking list). 1623cf8e06f1SMark Fasheh * 1624cf8e06f1SMark Fasheh * - Since userspace can trivially deadlock itself with flock, we make 1625cf8e06f1SMark Fasheh * sure to allow cancellation of a misbehaving applications flock() 1626cf8e06f1SMark Fasheh * request. 1627cf8e06f1SMark Fasheh * 1628cf8e06f1SMark Fasheh * - Access to any flock lockres doesn't require concurrency, so we 1629cf8e06f1SMark Fasheh * can simplify the code by requiring the caller to guarantee 1630cf8e06f1SMark Fasheh * serialization of dlmglue flock calls. 1631cf8e06f1SMark Fasheh */ 1632cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock) 1633cf8e06f1SMark Fasheh { 1634e988cf1cSMark Fasheh int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1635e988cf1cSMark Fasheh unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1636cf8e06f1SMark Fasheh unsigned long flags; 1637cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1638cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1639cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1640cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1641cf8e06f1SMark Fasheh 1642cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1643cf8e06f1SMark Fasheh 1644cf8e06f1SMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1645bd3e7610SJoel Becker (lockres->l_level > DLM_LOCK_NL)) { 1646cf8e06f1SMark Fasheh mlog(ML_ERROR, 1647cf8e06f1SMark Fasheh "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1648cf8e06f1SMark Fasheh "level: %u\n", lockres->l_name, lockres->l_flags, 1649cf8e06f1SMark Fasheh lockres->l_level); 1650cf8e06f1SMark Fasheh return -EINVAL; 1651cf8e06f1SMark Fasheh } 1652cf8e06f1SMark Fasheh 1653cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1654cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1655cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1656cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1657cf8e06f1SMark Fasheh 1658cf8e06f1SMark Fasheh /* 1659cf8e06f1SMark Fasheh * Get the lock at NLMODE to start - that way we 1660cf8e06f1SMark Fasheh * can cancel the upconvert request if need be. 1661cf8e06f1SMark Fasheh */ 1662e988cf1cSMark Fasheh ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1663cf8e06f1SMark Fasheh if (ret < 0) { 1664cf8e06f1SMark Fasheh mlog_errno(ret); 1665cf8e06f1SMark Fasheh goto out; 1666cf8e06f1SMark Fasheh } 1667cf8e06f1SMark Fasheh 1668cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1669cf8e06f1SMark Fasheh if (ret) { 1670cf8e06f1SMark Fasheh mlog_errno(ret); 1671cf8e06f1SMark Fasheh goto out; 1672cf8e06f1SMark Fasheh } 1673cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1674cf8e06f1SMark Fasheh } 1675cf8e06f1SMark Fasheh 1676cf8e06f1SMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1677e988cf1cSMark Fasheh lkm_flags |= DLM_LKF_CONVERT; 1678cf8e06f1SMark Fasheh lockres->l_requested = level; 1679cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1680cf8e06f1SMark Fasheh 1681cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1682cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1683cf8e06f1SMark Fasheh 16844670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1685cf8e06f1SMark Fasheh lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, 168624ef1815SJoel Becker lockres); 16877431cd7eSJoel Becker if (ret) { 16887431cd7eSJoel Becker if (!trylock || (ret != -EAGAIN)) { 168924ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1690cf8e06f1SMark Fasheh ret = -EINVAL; 1691cf8e06f1SMark Fasheh } 1692cf8e06f1SMark Fasheh 1693cf8e06f1SMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1694cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, &mw); 1695cf8e06f1SMark Fasheh goto out; 1696cf8e06f1SMark Fasheh } 1697cf8e06f1SMark Fasheh 1698cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1699cf8e06f1SMark Fasheh if (ret == -ERESTARTSYS) { 1700cf8e06f1SMark Fasheh /* 1701cf8e06f1SMark Fasheh * Userspace can cause deadlock itself with 1702cf8e06f1SMark Fasheh * flock(). Current behavior locally is to allow the 1703cf8e06f1SMark Fasheh * deadlock, but abort the system call if a signal is 1704cf8e06f1SMark Fasheh * received. We follow this example, otherwise a 1705cf8e06f1SMark Fasheh * poorly written program could sit in kernel until 1706cf8e06f1SMark Fasheh * reboot. 1707cf8e06f1SMark Fasheh * 1708cf8e06f1SMark Fasheh * Handling this is a bit more complicated for Ocfs2 1709cf8e06f1SMark Fasheh * though. We can't exit this function with an 1710cf8e06f1SMark Fasheh * outstanding lock request, so a cancel convert is 1711cf8e06f1SMark Fasheh * required. We intentionally overwrite 'ret' - if the 1712cf8e06f1SMark Fasheh * cancel fails and the lock was granted, it's easier 1713cf8e06f1SMark Fasheh * to just bubble sucess back up to the user. 1714cf8e06f1SMark Fasheh */ 1715cf8e06f1SMark Fasheh ret = ocfs2_flock_handle_signal(lockres, level); 17161693a5c0SDavid Teigland } else if (!ret && (level > lockres->l_level)) { 17171693a5c0SDavid Teigland /* Trylock failed asynchronously */ 17181693a5c0SDavid Teigland BUG_ON(!trylock); 17191693a5c0SDavid Teigland ret = -EAGAIN; 1720cf8e06f1SMark Fasheh } 1721cf8e06f1SMark Fasheh 1722cf8e06f1SMark Fasheh out: 1723cf8e06f1SMark Fasheh 1724cf8e06f1SMark Fasheh mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1725cf8e06f1SMark Fasheh lockres->l_name, ex, trylock, ret); 1726cf8e06f1SMark Fasheh return ret; 1727cf8e06f1SMark Fasheh } 1728cf8e06f1SMark Fasheh 1729cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file) 1730cf8e06f1SMark Fasheh { 1731cf8e06f1SMark Fasheh int ret; 1732de551246SJoel Becker unsigned int gen; 1733cf8e06f1SMark Fasheh unsigned long flags; 1734cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1735cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1736cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1737cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1738cf8e06f1SMark Fasheh 1739cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1740cf8e06f1SMark Fasheh 1741cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1742cf8e06f1SMark Fasheh return; 1743cf8e06f1SMark Fasheh 1744e988cf1cSMark Fasheh if (lockres->l_level == DLM_LOCK_NL) 1745cf8e06f1SMark Fasheh return; 1746cf8e06f1SMark Fasheh 1747cf8e06f1SMark Fasheh mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1748cf8e06f1SMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_level, 1749cf8e06f1SMark Fasheh lockres->l_action); 1750cf8e06f1SMark Fasheh 1751cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1752cf8e06f1SMark Fasheh /* 1753cf8e06f1SMark Fasheh * Fake a blocking ast for the downconvert code. 1754cf8e06f1SMark Fasheh */ 1755cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1756bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_EX; 1757cf8e06f1SMark Fasheh 1758e988cf1cSMark Fasheh gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 1759cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1760cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1761cf8e06f1SMark Fasheh 1762e988cf1cSMark Fasheh ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 1763cf8e06f1SMark Fasheh if (ret) { 1764cf8e06f1SMark Fasheh mlog_errno(ret); 1765cf8e06f1SMark Fasheh return; 1766cf8e06f1SMark Fasheh } 1767cf8e06f1SMark Fasheh 1768cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1769cf8e06f1SMark Fasheh if (ret) 1770cf8e06f1SMark Fasheh mlog_errno(ret); 1771cf8e06f1SMark Fasheh } 1772cf8e06f1SMark Fasheh 177334d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1774ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 1775ccd979bdSMark Fasheh { 1776ccd979bdSMark Fasheh int kick = 0; 1777ccd979bdSMark Fasheh 1778ccd979bdSMark Fasheh mlog_entry_void(); 1779ccd979bdSMark Fasheh 1780ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 178134d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 1782ccd979bdSMark Fasheh * condition. */ 1783ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1784ccd979bdSMark Fasheh switch(lockres->l_blocking) { 1785bd3e7610SJoel Becker case DLM_LOCK_EX: 1786ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1787ccd979bdSMark Fasheh kick = 1; 1788ccd979bdSMark Fasheh break; 1789bd3e7610SJoel Becker case DLM_LOCK_PR: 1790ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 1791ccd979bdSMark Fasheh kick = 1; 1792ccd979bdSMark Fasheh break; 1793ccd979bdSMark Fasheh default: 1794ccd979bdSMark Fasheh BUG(); 1795ccd979bdSMark Fasheh } 1796ccd979bdSMark Fasheh } 1797ccd979bdSMark Fasheh 1798ccd979bdSMark Fasheh if (kick) 179934d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1800ccd979bdSMark Fasheh 1801ccd979bdSMark Fasheh mlog_exit_void(); 1802ccd979bdSMark Fasheh } 1803ccd979bdSMark Fasheh 1804ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 1805ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 1806ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 1807ccd979bdSMark Fasheh 1808ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 1809ccd979bdSMark Fasheh * now. */ 1810ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 1811ccd979bdSMark Fasheh { 1812ccd979bdSMark Fasheh u64 res; 1813ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 1814ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 1815ccd979bdSMark Fasheh 1816ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 1817ccd979bdSMark Fasheh 1818ccd979bdSMark Fasheh return res; 1819ccd979bdSMark Fasheh } 1820ccd979bdSMark Fasheh 1821ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 1822ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 1823e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 1824ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 1825ccd979bdSMark Fasheh { 1826ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1827e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1828ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 1829ccd979bdSMark Fasheh 1830ccd979bdSMark Fasheh mlog_entry_void(); 1831ccd979bdSMark Fasheh 18328f2c9c1bSJoel Becker lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1833ccd979bdSMark Fasheh 183424c19ef4SMark Fasheh /* 183524c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 183624c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 183724c19ef4SMark Fasheh * status. 183824c19ef4SMark Fasheh */ 183924c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 184024c19ef4SMark Fasheh lvb->lvb_version = 0; 184124c19ef4SMark Fasheh goto out; 184224c19ef4SMark Fasheh } 184324c19ef4SMark Fasheh 18444d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 1845ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1846ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1847ccd979bdSMark Fasheh lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 1848ccd979bdSMark Fasheh lvb->lvb_igid = cpu_to_be32(inode->i_gid); 1849ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 1850ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 1851ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 1852ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 1853ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 1854ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 1855ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 1856ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1857ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 185815b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 1859f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 1860ccd979bdSMark Fasheh 186124c19ef4SMark Fasheh out: 1862ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 1863ccd979bdSMark Fasheh 1864ccd979bdSMark Fasheh mlog_exit_void(); 1865ccd979bdSMark Fasheh } 1866ccd979bdSMark Fasheh 1867ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 1868ccd979bdSMark Fasheh u64 packed_time) 1869ccd979bdSMark Fasheh { 1870ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 1871ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 1872ccd979bdSMark Fasheh } 1873ccd979bdSMark Fasheh 1874ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 1875ccd979bdSMark Fasheh { 1876ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1877e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1878ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 1879ccd979bdSMark Fasheh 1880ccd979bdSMark Fasheh mlog_entry_void(); 1881ccd979bdSMark Fasheh 1882ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 1883ccd979bdSMark Fasheh 18848f2c9c1bSJoel Becker lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1885ccd979bdSMark Fasheh 1886ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 1887ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 1888ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 1889ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1890ccd979bdSMark Fasheh 1891ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 189215b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 1893ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 1894ca4d147eSHerbert Poetzl 1895ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 1896ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 1897ccd979bdSMark Fasheh inode->i_blocks = 0; 1898ccd979bdSMark Fasheh else 18998110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 1900ccd979bdSMark Fasheh 1901ccd979bdSMark Fasheh inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 1902ccd979bdSMark Fasheh inode->i_gid = be32_to_cpu(lvb->lvb_igid); 1903ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 1904ccd979bdSMark Fasheh inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 1905ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 1906ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 1907ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 1908ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 1909ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 1910ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 1911ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 1912ccd979bdSMark Fasheh 1913ccd979bdSMark Fasheh mlog_exit_void(); 1914ccd979bdSMark Fasheh } 1915ccd979bdSMark Fasheh 1916f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 1917f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 1918ccd979bdSMark Fasheh { 19198f2c9c1bSJoel Becker struct ocfs2_meta_lvb *lvb = 19208f2c9c1bSJoel Becker (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1921ccd979bdSMark Fasheh 1922f9e2d82eSMark Fasheh if (lvb->lvb_version == OCFS2_LVB_VERSION 1923f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 1924ccd979bdSMark Fasheh return 1; 1925ccd979bdSMark Fasheh return 0; 1926ccd979bdSMark Fasheh } 1927ccd979bdSMark Fasheh 1928ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 1929ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 1930ccd979bdSMark Fasheh * 1931ccd979bdSMark Fasheh * 0 means no refresh needed. 1932ccd979bdSMark Fasheh * 1933ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 1934ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 1935ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 1936ccd979bdSMark Fasheh { 1937ccd979bdSMark Fasheh unsigned long flags; 1938ccd979bdSMark Fasheh int status = 0; 1939ccd979bdSMark Fasheh 1940ccd979bdSMark Fasheh mlog_entry_void(); 1941ccd979bdSMark Fasheh 1942ccd979bdSMark Fasheh refresh_check: 1943ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1944ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 1945ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1946ccd979bdSMark Fasheh goto bail; 1947ccd979bdSMark Fasheh } 1948ccd979bdSMark Fasheh 1949ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 1950ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1951ccd979bdSMark Fasheh 1952ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 1953ccd979bdSMark Fasheh goto refresh_check; 1954ccd979bdSMark Fasheh } 1955ccd979bdSMark Fasheh 1956ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 1957ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 1958ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1959ccd979bdSMark Fasheh 1960ccd979bdSMark Fasheh status = 1; 1961ccd979bdSMark Fasheh bail: 1962ccd979bdSMark Fasheh mlog_exit(status); 1963ccd979bdSMark Fasheh return status; 1964ccd979bdSMark Fasheh } 1965ccd979bdSMark Fasheh 1966ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 1967ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 1968ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 1969ccd979bdSMark Fasheh int status) 1970ccd979bdSMark Fasheh { 1971ccd979bdSMark Fasheh unsigned long flags; 1972ccd979bdSMark Fasheh mlog_entry_void(); 1973ccd979bdSMark Fasheh 1974ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1975ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 1976ccd979bdSMark Fasheh if (!status) 1977ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 1978ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1979ccd979bdSMark Fasheh 1980ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1981ccd979bdSMark Fasheh 1982ccd979bdSMark Fasheh mlog_exit_void(); 1983ccd979bdSMark Fasheh } 1984ccd979bdSMark Fasheh 1985ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 1986e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 1987ccd979bdSMark Fasheh struct buffer_head **bh) 1988ccd979bdSMark Fasheh { 1989ccd979bdSMark Fasheh int status = 0; 1990ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1991e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1992ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1993c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1994ccd979bdSMark Fasheh 1995ccd979bdSMark Fasheh mlog_entry_void(); 1996ccd979bdSMark Fasheh 1997be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 1998be9e986bSMark Fasheh goto bail; 1999be9e986bSMark Fasheh 2000ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2001ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 2002b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 2003ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 2004b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 2005ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2006ccd979bdSMark Fasheh status = -ENOENT; 2007ccd979bdSMark Fasheh goto bail; 2008ccd979bdSMark Fasheh } 2009ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2010ccd979bdSMark Fasheh 2011ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 2012ccd979bdSMark Fasheh goto bail; 2013ccd979bdSMark Fasheh 2014ccd979bdSMark Fasheh /* This will discard any caching information we might have had 2015ccd979bdSMark Fasheh * for the inode metadata. */ 2016ccd979bdSMark Fasheh ocfs2_metadata_cache_purge(inode); 2017ccd979bdSMark Fasheh 201883418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 201983418978SMark Fasheh 2020be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2021b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 2022b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 2023ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 2024ccd979bdSMark Fasheh } else { 2025ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 2026ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 2027b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, bh); 2028ccd979bdSMark Fasheh if (status < 0) { 2029ccd979bdSMark Fasheh mlog_errno(status); 2030ccd979bdSMark Fasheh goto bail_refresh; 2031ccd979bdSMark Fasheh } 2032ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 2033ccd979bdSMark Fasheh 2034ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 2035b657c95cSJoel Becker * locking an invalid object. ocfs2_read_inode_block() 2036b657c95cSJoel Becker * already checked that the inode block is sane. 2037ccd979bdSMark Fasheh * 2038ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 2039ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 2040ccd979bdSMark Fasheh * node provides a guarantee that we receive that 2041ccd979bdSMark Fasheh * message and can mark the inode before dropping any 2042ccd979bdSMark Fasheh * locks associated with it. */ 2043ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 2044ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 2045b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 2046ccd979bdSMark Fasheh "inode->i_generation: %u\n", 2047b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2048b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 2049ccd979bdSMark Fasheh inode->i_generation); 2050ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2051ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2052b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2053b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2054b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 2055ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 2056ccd979bdSMark Fasheh 2057ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 20588ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2059ccd979bdSMark Fasheh } 2060ccd979bdSMark Fasheh 2061ccd979bdSMark Fasheh status = 0; 2062ccd979bdSMark Fasheh bail_refresh: 2063ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2064ccd979bdSMark Fasheh bail: 2065ccd979bdSMark Fasheh mlog_exit(status); 2066ccd979bdSMark Fasheh return status; 2067ccd979bdSMark Fasheh } 2068ccd979bdSMark Fasheh 2069ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 2070ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2071ccd979bdSMark Fasheh struct buffer_head *passed_bh) 2072ccd979bdSMark Fasheh { 2073ccd979bdSMark Fasheh int status; 2074ccd979bdSMark Fasheh 2075ccd979bdSMark Fasheh if (passed_bh) { 2076ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 2077ccd979bdSMark Fasheh * returned bh. */ 2078ccd979bdSMark Fasheh *ret_bh = passed_bh; 2079ccd979bdSMark Fasheh get_bh(*ret_bh); 2080ccd979bdSMark Fasheh 2081ccd979bdSMark Fasheh return 0; 2082ccd979bdSMark Fasheh } 2083ccd979bdSMark Fasheh 2084b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, ret_bh); 2085ccd979bdSMark Fasheh if (status < 0) 2086ccd979bdSMark Fasheh mlog_errno(status); 2087ccd979bdSMark Fasheh 2088ccd979bdSMark Fasheh return status; 2089ccd979bdSMark Fasheh } 2090ccd979bdSMark Fasheh 2091ccd979bdSMark Fasheh /* 2092ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 2093ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 2094ccd979bdSMark Fasheh */ 2095e63aecb6SMark Fasheh int ocfs2_inode_lock_full(struct inode *inode, 2096ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2097ccd979bdSMark Fasheh int ex, 2098ccd979bdSMark Fasheh int arg_flags) 2099ccd979bdSMark Fasheh { 2100bd3e7610SJoel Becker int status, level, acquired; 2101bd3e7610SJoel Becker u32 dlm_flags; 2102c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 2103ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2104ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 2105ccd979bdSMark Fasheh 2106ccd979bdSMark Fasheh BUG_ON(!inode); 2107ccd979bdSMark Fasheh 2108ccd979bdSMark Fasheh mlog_entry_void(); 2109ccd979bdSMark Fasheh 2110b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 2111b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2112ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2113ccd979bdSMark Fasheh 2114ccd979bdSMark Fasheh status = 0; 2115ccd979bdSMark Fasheh acquired = 0; 2116ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 2117ccd979bdSMark Fasheh * rodevices. */ 2118ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 2119ccd979bdSMark Fasheh if (ex) 2120ccd979bdSMark Fasheh status = -EROFS; 2121ccd979bdSMark Fasheh goto bail; 2122ccd979bdSMark Fasheh } 2123ccd979bdSMark Fasheh 2124c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2125c271c5c2SSunil Mushran goto local; 2126c271c5c2SSunil Mushran 2127ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2128553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2129ccd979bdSMark Fasheh 2130e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 2131bd3e7610SJoel Becker level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2132ccd979bdSMark Fasheh dlm_flags = 0; 2133ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2134bd3e7610SJoel Becker dlm_flags |= DLM_LKF_NOQUEUE; 2135ccd979bdSMark Fasheh 2136ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 2137ccd979bdSMark Fasheh if (status < 0) { 2138ccd979bdSMark Fasheh if (status != -EAGAIN && status != -EIOCBRETRY) 2139ccd979bdSMark Fasheh mlog_errno(status); 2140ccd979bdSMark Fasheh goto bail; 2141ccd979bdSMark Fasheh } 2142ccd979bdSMark Fasheh 2143ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 2144ccd979bdSMark Fasheh acquired = 1; 2145ccd979bdSMark Fasheh 2146ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 2147ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 2148ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 2149ccd979bdSMark Fasheh * abort the operation. */ 2150ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2151553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2152ccd979bdSMark Fasheh 2153c271c5c2SSunil Mushran local: 215424c19ef4SMark Fasheh /* 215524c19ef4SMark Fasheh * We only see this flag if we're being called from 215624c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 215724c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 215824c19ef4SMark Fasheh * and let the caller handle it. 215924c19ef4SMark Fasheh */ 216024c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 216124c19ef4SMark Fasheh status = 0; 2162c271c5c2SSunil Mushran if (lockres) 216324c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 216424c19ef4SMark Fasheh goto bail; 216524c19ef4SMark Fasheh } 216624c19ef4SMark Fasheh 2167ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 2168e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 2169ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 2170ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 2171ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 2172e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 2173ccd979bdSMark Fasheh if (status < 0) { 2174ccd979bdSMark Fasheh if (status != -ENOENT) 2175ccd979bdSMark Fasheh mlog_errno(status); 2176ccd979bdSMark Fasheh goto bail; 2177ccd979bdSMark Fasheh } 2178ccd979bdSMark Fasheh 2179ccd979bdSMark Fasheh if (ret_bh) { 2180ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2181ccd979bdSMark Fasheh if (status < 0) { 2182ccd979bdSMark Fasheh mlog_errno(status); 2183ccd979bdSMark Fasheh goto bail; 2184ccd979bdSMark Fasheh } 2185ccd979bdSMark Fasheh } 2186ccd979bdSMark Fasheh 2187ccd979bdSMark Fasheh bail: 2188ccd979bdSMark Fasheh if (status < 0) { 2189ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 2190ccd979bdSMark Fasheh brelse(*ret_bh); 2191ccd979bdSMark Fasheh *ret_bh = NULL; 2192ccd979bdSMark Fasheh } 2193ccd979bdSMark Fasheh if (acquired) 2194e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2195ccd979bdSMark Fasheh } 2196ccd979bdSMark Fasheh 2197ccd979bdSMark Fasheh if (local_bh) 2198ccd979bdSMark Fasheh brelse(local_bh); 2199ccd979bdSMark Fasheh 2200ccd979bdSMark Fasheh mlog_exit(status); 2201ccd979bdSMark Fasheh return status; 2202ccd979bdSMark Fasheh } 2203ccd979bdSMark Fasheh 2204ccd979bdSMark Fasheh /* 220534d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 220634d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 220734d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 2208ccd979bdSMark Fasheh * 2209ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 2210ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 2211ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2212ccd979bdSMark Fasheh * 221334d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 221434d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 221534d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 221634d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 221734d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 221834d024f8SMark Fasheh * immediately retry the aop call. 2219ccd979bdSMark Fasheh * 2220ccd979bdSMark Fasheh * We do a blocking lock and immediate unlock before returning, though, so that 2221ccd979bdSMark Fasheh * the lock has a great chance of being cached on this node by the time the VFS 2222ccd979bdSMark Fasheh * calls back to retry the aop. This has a potential to livelock as nodes 2223ccd979bdSMark Fasheh * ping locks back and forth, but that's a risk we're willing to take to avoid 2224ccd979bdSMark Fasheh * the lock inversion simply. 2225ccd979bdSMark Fasheh */ 2226e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 2227ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2228ccd979bdSMark Fasheh int ex, 2229ccd979bdSMark Fasheh struct page *page) 2230ccd979bdSMark Fasheh { 2231ccd979bdSMark Fasheh int ret; 2232ccd979bdSMark Fasheh 2233e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2234ccd979bdSMark Fasheh if (ret == -EAGAIN) { 2235ccd979bdSMark Fasheh unlock_page(page); 2236e63aecb6SMark Fasheh if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2237e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2238ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 2239ccd979bdSMark Fasheh } 2240ccd979bdSMark Fasheh 2241ccd979bdSMark Fasheh return ret; 2242ccd979bdSMark Fasheh } 2243ccd979bdSMark Fasheh 2244e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 22457f1a37e3STiger Yang struct vfsmount *vfsmnt, 22467f1a37e3STiger Yang int *level) 22477f1a37e3STiger Yang { 22487f1a37e3STiger Yang int ret; 22497f1a37e3STiger Yang 22507f1a37e3STiger Yang mlog_entry_void(); 2251e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 22527f1a37e3STiger Yang if (ret < 0) { 22537f1a37e3STiger Yang mlog_errno(ret); 22547f1a37e3STiger Yang return ret; 22557f1a37e3STiger Yang } 22567f1a37e3STiger Yang 22577f1a37e3STiger Yang /* 22587f1a37e3STiger Yang * If we should update atime, we will get EX lock, 22597f1a37e3STiger Yang * otherwise we just get PR lock. 22607f1a37e3STiger Yang */ 22617f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 22627f1a37e3STiger Yang struct buffer_head *bh = NULL; 22637f1a37e3STiger Yang 2264e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 2265e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 22667f1a37e3STiger Yang if (ret < 0) { 22677f1a37e3STiger Yang mlog_errno(ret); 22687f1a37e3STiger Yang return ret; 22697f1a37e3STiger Yang } 22707f1a37e3STiger Yang *level = 1; 22717f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 22727f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 22737f1a37e3STiger Yang if (bh) 22747f1a37e3STiger Yang brelse(bh); 22757f1a37e3STiger Yang } else 22767f1a37e3STiger Yang *level = 0; 22777f1a37e3STiger Yang 22787f1a37e3STiger Yang mlog_exit(ret); 22797f1a37e3STiger Yang return ret; 22807f1a37e3STiger Yang } 22817f1a37e3STiger Yang 2282e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 2283ccd979bdSMark Fasheh int ex) 2284ccd979bdSMark Fasheh { 2285bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2286e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2287c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2288ccd979bdSMark Fasheh 2289ccd979bdSMark Fasheh mlog_entry_void(); 2290ccd979bdSMark Fasheh 2291b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 2292b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2293ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2294ccd979bdSMark Fasheh 2295c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2296c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 2297ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2298ccd979bdSMark Fasheh 2299ccd979bdSMark Fasheh mlog_exit_void(); 2300ccd979bdSMark Fasheh } 2301ccd979bdSMark Fasheh 2302ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 2303ccd979bdSMark Fasheh int ex) 2304ccd979bdSMark Fasheh { 2305c271c5c2SSunil Mushran int status = 0; 2306bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2307ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2308ccd979bdSMark Fasheh 2309ccd979bdSMark Fasheh mlog_entry_void(); 2310ccd979bdSMark Fasheh 2311ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2312ccd979bdSMark Fasheh return -EROFS; 2313ccd979bdSMark Fasheh 2314c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2315c271c5c2SSunil Mushran goto bail; 2316c271c5c2SSunil Mushran 2317ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2318ccd979bdSMark Fasheh if (status < 0) { 2319ccd979bdSMark Fasheh mlog_errno(status); 2320ccd979bdSMark Fasheh goto bail; 2321ccd979bdSMark Fasheh } 2322ccd979bdSMark Fasheh 2323ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 2324ccd979bdSMark Fasheh * know when resources covered by the lock need to be 2325ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 2326ccd979bdSMark Fasheh * everything is up to the caller :) */ 2327ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 2328ccd979bdSMark Fasheh if (status < 0) { 2329ccd979bdSMark Fasheh mlog_errno(status); 2330ccd979bdSMark Fasheh goto bail; 2331ccd979bdSMark Fasheh } 2332ccd979bdSMark Fasheh if (status) { 23338e8a4603SMark Fasheh status = ocfs2_refresh_slot_info(osb); 2334ccd979bdSMark Fasheh 2335ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2336ccd979bdSMark Fasheh 2337ccd979bdSMark Fasheh if (status < 0) 2338ccd979bdSMark Fasheh mlog_errno(status); 23398ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2340ccd979bdSMark Fasheh } 2341ccd979bdSMark Fasheh bail: 2342ccd979bdSMark Fasheh mlog_exit(status); 2343ccd979bdSMark Fasheh return status; 2344ccd979bdSMark Fasheh } 2345ccd979bdSMark Fasheh 2346ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 2347ccd979bdSMark Fasheh int ex) 2348ccd979bdSMark Fasheh { 2349bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2350ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2351ccd979bdSMark Fasheh 2352c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2353ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 2354ccd979bdSMark Fasheh } 2355ccd979bdSMark Fasheh 2356ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 2357ccd979bdSMark Fasheh { 2358ccd979bdSMark Fasheh int status; 2359ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2360ccd979bdSMark Fasheh 2361ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2362ccd979bdSMark Fasheh return -EROFS; 2363ccd979bdSMark Fasheh 2364c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2365c271c5c2SSunil Mushran return 0; 2366c271c5c2SSunil Mushran 2367bd3e7610SJoel Becker status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2368ccd979bdSMark Fasheh if (status < 0) 2369ccd979bdSMark Fasheh mlog_errno(status); 2370ccd979bdSMark Fasheh 2371ccd979bdSMark Fasheh return status; 2372ccd979bdSMark Fasheh } 2373ccd979bdSMark Fasheh 2374ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 2375ccd979bdSMark Fasheh { 2376ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2377ccd979bdSMark Fasheh 2378c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2379bd3e7610SJoel Becker ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2380ccd979bdSMark Fasheh } 2381ccd979bdSMark Fasheh 2382d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2383d680efe9SMark Fasheh { 2384d680efe9SMark Fasheh int ret; 2385bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2386d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2387d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2388d680efe9SMark Fasheh 2389d680efe9SMark Fasheh BUG_ON(!dl); 2390d680efe9SMark Fasheh 2391d680efe9SMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2392d680efe9SMark Fasheh return -EROFS; 2393d680efe9SMark Fasheh 2394c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2395c271c5c2SSunil Mushran return 0; 2396c271c5c2SSunil Mushran 2397d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2398d680efe9SMark Fasheh if (ret < 0) 2399d680efe9SMark Fasheh mlog_errno(ret); 2400d680efe9SMark Fasheh 2401d680efe9SMark Fasheh return ret; 2402d680efe9SMark Fasheh } 2403d680efe9SMark Fasheh 2404d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2405d680efe9SMark Fasheh { 2406bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2407d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2408d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2409d680efe9SMark Fasheh 2410c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2411d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2412d680efe9SMark Fasheh } 2413d680efe9SMark Fasheh 2414ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 2415ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 2416ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 2417ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 2418ccd979bdSMark Fasheh { 2419ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2420ccd979bdSMark Fasheh 2421ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2422ccd979bdSMark Fasheh 2423ccd979bdSMark Fasheh kfree(dlm_debug); 2424ccd979bdSMark Fasheh } 2425ccd979bdSMark Fasheh 2426ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2427ccd979bdSMark Fasheh { 2428ccd979bdSMark Fasheh if (dlm_debug) 2429ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2430ccd979bdSMark Fasheh } 2431ccd979bdSMark Fasheh 2432ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2433ccd979bdSMark Fasheh { 2434ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 2435ccd979bdSMark Fasheh } 2436ccd979bdSMark Fasheh 2437ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2438ccd979bdSMark Fasheh { 2439ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2440ccd979bdSMark Fasheh 2441ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2442ccd979bdSMark Fasheh if (!dlm_debug) { 2443ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 2444ccd979bdSMark Fasheh goto out; 2445ccd979bdSMark Fasheh } 2446ccd979bdSMark Fasheh 2447ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 2448ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2449ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 2450ccd979bdSMark Fasheh out: 2451ccd979bdSMark Fasheh return dlm_debug; 2452ccd979bdSMark Fasheh } 2453ccd979bdSMark Fasheh 2454ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 2455ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 2456ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 2457ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 2458ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 2459ccd979bdSMark Fasheh }; 2460ccd979bdSMark Fasheh 2461ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2462ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2463ccd979bdSMark Fasheh { 2464ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2465ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2466ccd979bdSMark Fasheh 2467ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2468ccd979bdSMark Fasheh 2469ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2470ccd979bdSMark Fasheh /* discover the head of the list */ 2471ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2472ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2473ccd979bdSMark Fasheh break; 2474ccd979bdSMark Fasheh } 2475ccd979bdSMark Fasheh 2476ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2477ccd979bdSMark Fasheh * l_ops field. */ 2478ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2479ccd979bdSMark Fasheh ret = iter; 2480ccd979bdSMark Fasheh break; 2481ccd979bdSMark Fasheh } 2482ccd979bdSMark Fasheh } 2483ccd979bdSMark Fasheh 2484ccd979bdSMark Fasheh return ret; 2485ccd979bdSMark Fasheh } 2486ccd979bdSMark Fasheh 2487ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2488ccd979bdSMark Fasheh { 2489ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2490ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2491ccd979bdSMark Fasheh 2492ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2493ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2494ccd979bdSMark Fasheh if (iter) { 2495ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2496ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2497ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2498ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2499ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2500ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2501ccd979bdSMark Fasheh * in them. */ 2502ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2503ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2504ccd979bdSMark Fasheh } 2505ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2506ccd979bdSMark Fasheh 2507ccd979bdSMark Fasheh return iter; 2508ccd979bdSMark Fasheh } 2509ccd979bdSMark Fasheh 2510ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2511ccd979bdSMark Fasheh { 2512ccd979bdSMark Fasheh } 2513ccd979bdSMark Fasheh 2514ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2515ccd979bdSMark Fasheh { 2516ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2517ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 2518ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2519ccd979bdSMark Fasheh 2520ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2521ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 2522ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 2523ccd979bdSMark Fasheh if (iter) { 2524ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 2525ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2526ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2527ccd979bdSMark Fasheh } 2528ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2529ccd979bdSMark Fasheh 2530ccd979bdSMark Fasheh return iter; 2531ccd979bdSMark Fasheh } 2532ccd979bdSMark Fasheh 2533ccd979bdSMark Fasheh /* So that debugfs.ocfs2 can determine which format is being used */ 25348ddb7b00SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 2 2535ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2536ccd979bdSMark Fasheh { 2537ccd979bdSMark Fasheh int i; 2538ccd979bdSMark Fasheh char *lvb; 2539ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 2540ccd979bdSMark Fasheh 2541ccd979bdSMark Fasheh if (!lockres) 2542ccd979bdSMark Fasheh return -EINVAL; 2543ccd979bdSMark Fasheh 2544d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2545d680efe9SMark Fasheh 2546d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2547d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2548d680efe9SMark Fasheh lockres->l_name, 2549d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2550d680efe9SMark Fasheh else 2551d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2552d680efe9SMark Fasheh 2553d680efe9SMark Fasheh seq_printf(m, "%d\t" 2554ccd979bdSMark Fasheh "0x%lx\t" 2555ccd979bdSMark Fasheh "0x%x\t" 2556ccd979bdSMark Fasheh "0x%x\t" 2557ccd979bdSMark Fasheh "%u\t" 2558ccd979bdSMark Fasheh "%u\t" 2559ccd979bdSMark Fasheh "%d\t" 2560ccd979bdSMark Fasheh "%d\t", 2561ccd979bdSMark Fasheh lockres->l_level, 2562ccd979bdSMark Fasheh lockres->l_flags, 2563ccd979bdSMark Fasheh lockres->l_action, 2564ccd979bdSMark Fasheh lockres->l_unlock_action, 2565ccd979bdSMark Fasheh lockres->l_ro_holders, 2566ccd979bdSMark Fasheh lockres->l_ex_holders, 2567ccd979bdSMark Fasheh lockres->l_requested, 2568ccd979bdSMark Fasheh lockres->l_blocking); 2569ccd979bdSMark Fasheh 2570ccd979bdSMark Fasheh /* Dump the raw LVB */ 25718f2c9c1bSJoel Becker lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2572ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 2573ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 2574ccd979bdSMark Fasheh 25758ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 25768ddb7b00SSunil Mushran # define lock_num_prmode(_l) (_l)->l_lock_num_prmode 25778ddb7b00SSunil Mushran # define lock_num_exmode(_l) (_l)->l_lock_num_exmode 25788ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed 25798ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed 25808ddb7b00SSunil Mushran # define lock_total_prmode(_l) (_l)->l_lock_total_prmode 25818ddb7b00SSunil Mushran # define lock_total_exmode(_l) (_l)->l_lock_total_exmode 25828ddb7b00SSunil Mushran # define lock_max_prmode(_l) (_l)->l_lock_max_prmode 25838ddb7b00SSunil Mushran # define lock_max_exmode(_l) (_l)->l_lock_max_exmode 25848ddb7b00SSunil Mushran # define lock_refresh(_l) (_l)->l_lock_refresh 25858ddb7b00SSunil Mushran #else 2586dd25e55eSRandy Dunlap # define lock_num_prmode(_l) (0ULL) 2587dd25e55eSRandy Dunlap # define lock_num_exmode(_l) (0ULL) 25888ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (0) 25898ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (0) 2590dd25e55eSRandy Dunlap # define lock_total_prmode(_l) (0ULL) 2591dd25e55eSRandy Dunlap # define lock_total_exmode(_l) (0ULL) 25928ddb7b00SSunil Mushran # define lock_max_prmode(_l) (0) 25938ddb7b00SSunil Mushran # define lock_max_exmode(_l) (0) 25948ddb7b00SSunil Mushran # define lock_refresh(_l) (0) 25958ddb7b00SSunil Mushran #endif 25968ddb7b00SSunil Mushran /* The following seq_print was added in version 2 of this output */ 25978ddb7b00SSunil Mushran seq_printf(m, "%llu\t" 25988ddb7b00SSunil Mushran "%llu\t" 25998ddb7b00SSunil Mushran "%u\t" 26008ddb7b00SSunil Mushran "%u\t" 26018ddb7b00SSunil Mushran "%llu\t" 26028ddb7b00SSunil Mushran "%llu\t" 26038ddb7b00SSunil Mushran "%u\t" 26048ddb7b00SSunil Mushran "%u\t" 26058ddb7b00SSunil Mushran "%u\t", 26068ddb7b00SSunil Mushran lock_num_prmode(lockres), 26078ddb7b00SSunil Mushran lock_num_exmode(lockres), 26088ddb7b00SSunil Mushran lock_num_prmode_failed(lockres), 26098ddb7b00SSunil Mushran lock_num_exmode_failed(lockres), 26108ddb7b00SSunil Mushran lock_total_prmode(lockres), 26118ddb7b00SSunil Mushran lock_total_exmode(lockres), 26128ddb7b00SSunil Mushran lock_max_prmode(lockres), 26138ddb7b00SSunil Mushran lock_max_exmode(lockres), 26148ddb7b00SSunil Mushran lock_refresh(lockres)); 26158ddb7b00SSunil Mushran 2616ccd979bdSMark Fasheh /* End the line */ 2617ccd979bdSMark Fasheh seq_printf(m, "\n"); 2618ccd979bdSMark Fasheh return 0; 2619ccd979bdSMark Fasheh } 2620ccd979bdSMark Fasheh 262190d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = { 2622ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 2623ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 2624ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 2625ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 2626ccd979bdSMark Fasheh }; 2627ccd979bdSMark Fasheh 2628ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2629ccd979bdSMark Fasheh { 2630ccd979bdSMark Fasheh struct seq_file *seq = (struct seq_file *) file->private_data; 2631ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 2632ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 2633ccd979bdSMark Fasheh 2634ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 2635ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 2636ccd979bdSMark Fasheh return seq_release_private(inode, file); 2637ccd979bdSMark Fasheh } 2638ccd979bdSMark Fasheh 2639ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2640ccd979bdSMark Fasheh { 2641ccd979bdSMark Fasheh int ret; 2642ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 2643ccd979bdSMark Fasheh struct seq_file *seq; 2644ccd979bdSMark Fasheh struct ocfs2_super *osb; 2645ccd979bdSMark Fasheh 2646ccd979bdSMark Fasheh priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2647ccd979bdSMark Fasheh if (!priv) { 2648ccd979bdSMark Fasheh ret = -ENOMEM; 2649ccd979bdSMark Fasheh mlog_errno(ret); 2650ccd979bdSMark Fasheh goto out; 2651ccd979bdSMark Fasheh } 26528e18e294STheodore Ts'o osb = inode->i_private; 2653ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2654ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 2655ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2656ccd979bdSMark Fasheh 2657ccd979bdSMark Fasheh ret = seq_open(file, &ocfs2_dlm_seq_ops); 2658ccd979bdSMark Fasheh if (ret) { 2659ccd979bdSMark Fasheh kfree(priv); 2660ccd979bdSMark Fasheh mlog_errno(ret); 2661ccd979bdSMark Fasheh goto out; 2662ccd979bdSMark Fasheh } 2663ccd979bdSMark Fasheh 2664ccd979bdSMark Fasheh seq = (struct seq_file *) file->private_data; 2665ccd979bdSMark Fasheh seq->private = priv; 2666ccd979bdSMark Fasheh 2667ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 2668ccd979bdSMark Fasheh priv->p_dlm_debug); 2669ccd979bdSMark Fasheh 2670ccd979bdSMark Fasheh out: 2671ccd979bdSMark Fasheh return ret; 2672ccd979bdSMark Fasheh } 2673ccd979bdSMark Fasheh 26744b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 2675ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 2676ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 2677ccd979bdSMark Fasheh .read = seq_read, 2678ccd979bdSMark Fasheh .llseek = seq_lseek, 2679ccd979bdSMark Fasheh }; 2680ccd979bdSMark Fasheh 2681ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2682ccd979bdSMark Fasheh { 2683ccd979bdSMark Fasheh int ret = 0; 2684ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2685ccd979bdSMark Fasheh 2686ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2687ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 2688ccd979bdSMark Fasheh osb->osb_debug_root, 2689ccd979bdSMark Fasheh osb, 2690ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 2691ccd979bdSMark Fasheh if (!dlm_debug->d_locking_state) { 2692ccd979bdSMark Fasheh ret = -EINVAL; 2693ccd979bdSMark Fasheh mlog(ML_ERROR, 2694ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 2695ccd979bdSMark Fasheh goto out; 2696ccd979bdSMark Fasheh } 2697ccd979bdSMark Fasheh 2698ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 2699ccd979bdSMark Fasheh out: 2700ccd979bdSMark Fasheh return ret; 2701ccd979bdSMark Fasheh } 2702ccd979bdSMark Fasheh 2703ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2704ccd979bdSMark Fasheh { 2705ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2706ccd979bdSMark Fasheh 2707ccd979bdSMark Fasheh if (dlm_debug) { 2708ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 2709ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 2710ccd979bdSMark Fasheh } 2711ccd979bdSMark Fasheh } 2712ccd979bdSMark Fasheh 2713ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 2714ccd979bdSMark Fasheh { 2715c271c5c2SSunil Mushran int status = 0; 27164670c46dSJoel Becker struct ocfs2_cluster_connection *conn = NULL; 2717ccd979bdSMark Fasheh 2718ccd979bdSMark Fasheh mlog_entry_void(); 2719ccd979bdSMark Fasheh 27200abd6d18SMark Fasheh if (ocfs2_mount_local(osb)) { 27210abd6d18SMark Fasheh osb->node_num = 0; 2722c271c5c2SSunil Mushran goto local; 27230abd6d18SMark Fasheh } 2724c271c5c2SSunil Mushran 2725ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 2726ccd979bdSMark Fasheh if (status < 0) { 2727ccd979bdSMark Fasheh mlog_errno(status); 2728ccd979bdSMark Fasheh goto bail; 2729ccd979bdSMark Fasheh } 2730ccd979bdSMark Fasheh 273134d024f8SMark Fasheh /* launch downconvert thread */ 273234d024f8SMark Fasheh osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 273334d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 273434d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 273534d024f8SMark Fasheh osb->dc_task = NULL; 2736ccd979bdSMark Fasheh mlog_errno(status); 2737ccd979bdSMark Fasheh goto bail; 2738ccd979bdSMark Fasheh } 2739ccd979bdSMark Fasheh 2740ccd979bdSMark Fasheh /* for now, uuid == domain */ 27419c6c877cSJoel Becker status = ocfs2_cluster_connect(osb->osb_cluster_stack, 27429c6c877cSJoel Becker osb->uuid_str, 27434670c46dSJoel Becker strlen(osb->uuid_str), 27444670c46dSJoel Becker ocfs2_do_node_down, osb, 27454670c46dSJoel Becker &conn); 27464670c46dSJoel Becker if (status) { 2747ccd979bdSMark Fasheh mlog_errno(status); 2748ccd979bdSMark Fasheh goto bail; 2749ccd979bdSMark Fasheh } 2750ccd979bdSMark Fasheh 27510abd6d18SMark Fasheh status = ocfs2_cluster_this_node(&osb->node_num); 27520abd6d18SMark Fasheh if (status < 0) { 27530abd6d18SMark Fasheh mlog_errno(status); 27540abd6d18SMark Fasheh mlog(ML_ERROR, 27550abd6d18SMark Fasheh "could not find this host's node number\n"); 2756286eaa95SJoel Becker ocfs2_cluster_disconnect(conn, 0); 27570abd6d18SMark Fasheh goto bail; 27580abd6d18SMark Fasheh } 27590abd6d18SMark Fasheh 2760c271c5c2SSunil Mushran local: 2761ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2762ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2763ccd979bdSMark Fasheh 27644670c46dSJoel Becker osb->cconn = conn; 2765ccd979bdSMark Fasheh 2766ccd979bdSMark Fasheh status = 0; 2767ccd979bdSMark Fasheh bail: 2768ccd979bdSMark Fasheh if (status < 0) { 2769ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 277034d024f8SMark Fasheh if (osb->dc_task) 277134d024f8SMark Fasheh kthread_stop(osb->dc_task); 2772ccd979bdSMark Fasheh } 2773ccd979bdSMark Fasheh 2774ccd979bdSMark Fasheh mlog_exit(status); 2775ccd979bdSMark Fasheh return status; 2776ccd979bdSMark Fasheh } 2777ccd979bdSMark Fasheh 2778286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 2779286eaa95SJoel Becker int hangup_pending) 2780ccd979bdSMark Fasheh { 2781ccd979bdSMark Fasheh mlog_entry_void(); 2782ccd979bdSMark Fasheh 2783ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 2784ccd979bdSMark Fasheh 27854670c46dSJoel Becker /* 27864670c46dSJoel Becker * Now that we have dropped all locks and ocfs2_dismount_volume() 27874670c46dSJoel Becker * has disabled recovery, the DLM won't be talking to us. It's 27884670c46dSJoel Becker * safe to tear things down before disconnecting the cluster. 27894670c46dSJoel Becker */ 27904670c46dSJoel Becker 279134d024f8SMark Fasheh if (osb->dc_task) { 279234d024f8SMark Fasheh kthread_stop(osb->dc_task); 279334d024f8SMark Fasheh osb->dc_task = NULL; 2794ccd979bdSMark Fasheh } 2795ccd979bdSMark Fasheh 2796ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 2797ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 2798ccd979bdSMark Fasheh 2799286eaa95SJoel Becker ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 28004670c46dSJoel Becker osb->cconn = NULL; 2801ccd979bdSMark Fasheh 2802ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 2803ccd979bdSMark Fasheh 2804ccd979bdSMark Fasheh mlog_exit_void(); 2805ccd979bdSMark Fasheh } 2806ccd979bdSMark Fasheh 28077431cd7eSJoel Becker static void ocfs2_unlock_ast(void *opaque, int error) 2808ccd979bdSMark Fasheh { 2809ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 2810ccd979bdSMark Fasheh unsigned long flags; 2811ccd979bdSMark Fasheh 2812ccd979bdSMark Fasheh mlog_entry_void(); 2813ccd979bdSMark Fasheh 2814ccd979bdSMark Fasheh mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 2815ccd979bdSMark Fasheh lockres->l_unlock_action); 2816ccd979bdSMark Fasheh 2817ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2818de551246SJoel Becker if (error) { 28197431cd7eSJoel Becker mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 28207431cd7eSJoel Becker "unlock_action %d\n", error, lockres->l_name, 2821ccd979bdSMark Fasheh lockres->l_unlock_action); 2822ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2823ccd979bdSMark Fasheh return; 2824ccd979bdSMark Fasheh } 2825ccd979bdSMark Fasheh 2826ccd979bdSMark Fasheh switch(lockres->l_unlock_action) { 2827ccd979bdSMark Fasheh case OCFS2_UNLOCK_CANCEL_CONVERT: 2828ccd979bdSMark Fasheh mlog(0, "Cancel convert success for %s\n", lockres->l_name); 2829ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 2830ccd979bdSMark Fasheh break; 2831ccd979bdSMark Fasheh case OCFS2_UNLOCK_DROP_LOCK: 2832bd3e7610SJoel Becker lockres->l_level = DLM_LOCK_IV; 2833ccd979bdSMark Fasheh break; 2834ccd979bdSMark Fasheh default: 2835ccd979bdSMark Fasheh BUG(); 2836ccd979bdSMark Fasheh } 2837ccd979bdSMark Fasheh 2838ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 2839ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 2840ccd979bdSMark Fasheh wake_up(&lockres->l_event); 284107f9eebcSDavid Teigland spin_unlock_irqrestore(&lockres->l_lock, flags); 2842ccd979bdSMark Fasheh 2843ccd979bdSMark Fasheh mlog_exit_void(); 2844ccd979bdSMark Fasheh } 2845ccd979bdSMark Fasheh 2846ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 28470d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 2848ccd979bdSMark Fasheh { 28497431cd7eSJoel Becker int ret; 2850ccd979bdSMark Fasheh unsigned long flags; 2851bd3e7610SJoel Becker u32 lkm_flags = 0; 2852ccd979bdSMark Fasheh 2853ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 2854ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2855ccd979bdSMark Fasheh goto out; 2856ccd979bdSMark Fasheh 2857b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 2858bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 2859b80fc012SMark Fasheh 2860ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2861ccd979bdSMark Fasheh 2862ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 2863ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 2864ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 2865ccd979bdSMark Fasheh 2866ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 2867ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 2868ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 2869ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 2870ccd979bdSMark Fasheh lockres->l_unlock_action); 2871ccd979bdSMark Fasheh 2872ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2873ccd979bdSMark Fasheh 2874ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 2875ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 2876ccd979bdSMark Fasheh * future? */ 2877ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 2878ccd979bdSMark Fasheh 2879ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2880ccd979bdSMark Fasheh } 2881ccd979bdSMark Fasheh 28820d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 28830d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 2884bd3e7610SJoel Becker lockres->l_level == DLM_LOCK_EX && 28850d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 28860d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 28870d5dc6c2SMark Fasheh } 2888ccd979bdSMark Fasheh 2889ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 2890ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 2891ccd979bdSMark Fasheh lockres->l_name); 2892ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 2893ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 2894ccd979bdSMark Fasheh 2895ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 2896ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2897ccd979bdSMark Fasheh goto out; 2898ccd979bdSMark Fasheh } 2899ccd979bdSMark Fasheh 2900ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 2901ccd979bdSMark Fasheh 2902ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 2903ccd979bdSMark Fasheh * fire. */ 2904ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 2905ccd979bdSMark Fasheh 2906ccd979bdSMark Fasheh /* is this necessary? */ 2907ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 2908ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 2909ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2910ccd979bdSMark Fasheh 2911ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 2912ccd979bdSMark Fasheh 29134670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, 291424ef1815SJoel Becker lockres); 29157431cd7eSJoel Becker if (ret) { 29167431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 2917ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 2918cf0acdcdSJoel Becker ocfs2_dlm_dump_lksb(&lockres->l_lksb); 2919ccd979bdSMark Fasheh BUG(); 2920ccd979bdSMark Fasheh } 292124ef1815SJoel Becker mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", 2922ccd979bdSMark Fasheh lockres->l_name); 2923ccd979bdSMark Fasheh 2924ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 2925ccd979bdSMark Fasheh out: 2926ccd979bdSMark Fasheh mlog_exit(0); 2927ccd979bdSMark Fasheh return 0; 2928ccd979bdSMark Fasheh } 2929ccd979bdSMark Fasheh 2930ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 2931ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 293234d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 2933ccd979bdSMark Fasheh * it safe to drop. 2934ccd979bdSMark Fasheh * 2935ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 2936ccd979bdSMark Fasheh void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 2937ccd979bdSMark Fasheh { 2938ccd979bdSMark Fasheh int status; 2939ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 2940ccd979bdSMark Fasheh unsigned long flags; 2941ccd979bdSMark Fasheh 2942ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 2943ccd979bdSMark Fasheh 2944ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2945ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 2946ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 2947ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 2948ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2949ccd979bdSMark Fasheh 2950ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 2951ccd979bdSMark Fasheh 2952ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 2953ccd979bdSMark Fasheh if (status) 2954ccd979bdSMark Fasheh mlog_errno(status); 2955ccd979bdSMark Fasheh 2956ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2957ccd979bdSMark Fasheh } 2958ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2959ccd979bdSMark Fasheh } 2960ccd979bdSMark Fasheh 2961d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 2962d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 2963d680efe9SMark Fasheh { 2964d680efe9SMark Fasheh int ret; 2965d680efe9SMark Fasheh 2966d680efe9SMark Fasheh ocfs2_mark_lockres_freeing(lockres); 29670d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 2968d680efe9SMark Fasheh if (ret) 2969d680efe9SMark Fasheh mlog_errno(ret); 2970d680efe9SMark Fasheh } 2971d680efe9SMark Fasheh 2972ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 2973ccd979bdSMark Fasheh { 2974d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 2975d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 2976ccd979bdSMark Fasheh } 2977ccd979bdSMark Fasheh 2978ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 2979ccd979bdSMark Fasheh { 2980ccd979bdSMark Fasheh int status, err; 2981ccd979bdSMark Fasheh 2982ccd979bdSMark Fasheh mlog_entry_void(); 2983ccd979bdSMark Fasheh 2984ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 2985ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 2986ccd979bdSMark Fasheh 2987ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 298850008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 2989ccd979bdSMark Fasheh if (err < 0) 2990ccd979bdSMark Fasheh mlog_errno(err); 2991ccd979bdSMark Fasheh 2992ccd979bdSMark Fasheh status = err; 2993ccd979bdSMark Fasheh 2994ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2995e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 2996ccd979bdSMark Fasheh if (err < 0) 2997ccd979bdSMark Fasheh mlog_errno(err); 2998ccd979bdSMark Fasheh if (err < 0 && !status) 2999ccd979bdSMark Fasheh status = err; 3000ccd979bdSMark Fasheh 3001ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 30020d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 3003ccd979bdSMark Fasheh if (err < 0) 3004ccd979bdSMark Fasheh mlog_errno(err); 3005ccd979bdSMark Fasheh if (err < 0 && !status) 3006ccd979bdSMark Fasheh status = err; 3007ccd979bdSMark Fasheh 3008ccd979bdSMark Fasheh mlog_exit(status); 3009ccd979bdSMark Fasheh return status; 3010ccd979bdSMark Fasheh } 3011ccd979bdSMark Fasheh 3012de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3013ccd979bdSMark Fasheh int new_level) 3014ccd979bdSMark Fasheh { 3015ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3016ccd979bdSMark Fasheh 3017bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3018ccd979bdSMark Fasheh 3019ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 3020bd3e7610SJoel Becker mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", 3021ccd979bdSMark Fasheh lockres->l_level, new_level); 3022ccd979bdSMark Fasheh BUG(); 3023ccd979bdSMark Fasheh } 3024ccd979bdSMark Fasheh 3025ccd979bdSMark Fasheh mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 3026ccd979bdSMark Fasheh lockres->l_name, new_level, lockres->l_blocking); 3027ccd979bdSMark Fasheh 3028ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 3029ccd979bdSMark Fasheh lockres->l_requested = new_level; 3030ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3031de551246SJoel Becker return lockres_set_pending(lockres); 3032ccd979bdSMark Fasheh } 3033ccd979bdSMark Fasheh 3034ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3035ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3036ccd979bdSMark Fasheh int new_level, 3037de551246SJoel Becker int lvb, 3038de551246SJoel Becker unsigned int generation) 3039ccd979bdSMark Fasheh { 3040bd3e7610SJoel Becker int ret; 3041bd3e7610SJoel Becker u32 dlm_flags = DLM_LKF_CONVERT; 3042ccd979bdSMark Fasheh 3043ccd979bdSMark Fasheh mlog_entry_void(); 3044ccd979bdSMark Fasheh 3045ccd979bdSMark Fasheh if (lvb) 3046bd3e7610SJoel Becker dlm_flags |= DLM_LKF_VALBLK; 3047ccd979bdSMark Fasheh 30484670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 3049ccd979bdSMark Fasheh new_level, 3050ccd979bdSMark Fasheh &lockres->l_lksb, 3051ccd979bdSMark Fasheh dlm_flags, 3052ccd979bdSMark Fasheh lockres->l_name, 3053f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 305424ef1815SJoel Becker lockres); 3055de551246SJoel Becker lockres_clear_pending(lockres, generation, osb); 30567431cd7eSJoel Becker if (ret) { 30577431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3058ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 3059ccd979bdSMark Fasheh goto bail; 3060ccd979bdSMark Fasheh } 3061ccd979bdSMark Fasheh 3062ccd979bdSMark Fasheh ret = 0; 3063ccd979bdSMark Fasheh bail: 3064ccd979bdSMark Fasheh mlog_exit(ret); 3065ccd979bdSMark Fasheh return ret; 3066ccd979bdSMark Fasheh } 3067ccd979bdSMark Fasheh 306824ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3069ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3070ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3071ccd979bdSMark Fasheh { 3072ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3073ccd979bdSMark Fasheh 3074ccd979bdSMark Fasheh mlog_entry_void(); 3075ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3076ccd979bdSMark Fasheh 3077ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3078ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 3079ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 3080ccd979bdSMark Fasheh * requeue this lock. */ 3081ccd979bdSMark Fasheh 3082ccd979bdSMark Fasheh mlog(0, "Lockres %s, skip convert\n", lockres->l_name); 3083ccd979bdSMark Fasheh return 0; 3084ccd979bdSMark Fasheh } 3085ccd979bdSMark Fasheh 3086ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 3087ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3088ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 3089ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 3090ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 3091ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3092ccd979bdSMark Fasheh 3093ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3094ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 3095ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3096ccd979bdSMark Fasheh 3097ccd979bdSMark Fasheh return 1; 3098ccd979bdSMark Fasheh } 3099ccd979bdSMark Fasheh 3100ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3101ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3102ccd979bdSMark Fasheh { 3103ccd979bdSMark Fasheh int ret; 3104ccd979bdSMark Fasheh 3105ccd979bdSMark Fasheh mlog_entry_void(); 3106ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3107ccd979bdSMark Fasheh 31084670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 31097431cd7eSJoel Becker DLM_LKF_CANCEL, lockres); 31107431cd7eSJoel Becker if (ret) { 31117431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3112ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 3113ccd979bdSMark Fasheh } 3114ccd979bdSMark Fasheh 311524ef1815SJoel Becker mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); 3116ccd979bdSMark Fasheh 3117ccd979bdSMark Fasheh mlog_exit(ret); 3118ccd979bdSMark Fasheh return ret; 3119ccd979bdSMark Fasheh } 3120ccd979bdSMark Fasheh 3121b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3122ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3123cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 3124ccd979bdSMark Fasheh { 3125ccd979bdSMark Fasheh unsigned long flags; 3126ccd979bdSMark Fasheh int blocking; 3127ccd979bdSMark Fasheh int new_level; 3128ccd979bdSMark Fasheh int ret = 0; 31295ef0d4eaSMark Fasheh int set_lvb = 0; 3130de551246SJoel Becker unsigned int gen; 3131ccd979bdSMark Fasheh 3132ccd979bdSMark Fasheh mlog_entry_void(); 3133ccd979bdSMark Fasheh 3134ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3135ccd979bdSMark Fasheh 3136ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 3137ccd979bdSMark Fasheh 3138ccd979bdSMark Fasheh recheck: 3139ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3140de551246SJoel Becker /* XXX 3141de551246SJoel Becker * This is a *big* race. The OCFS2_LOCK_PENDING flag 3142de551246SJoel Becker * exists entirely for one reason - another thread has set 3143de551246SJoel Becker * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3144de551246SJoel Becker * 3145de551246SJoel Becker * If we do ocfs2_cancel_convert() before the other thread 3146de551246SJoel Becker * calls dlm_lock(), our cancel will do nothing. We will 3147de551246SJoel Becker * get no ast, and we will have no way of knowing the 3148de551246SJoel Becker * cancel failed. Meanwhile, the other thread will call 3149de551246SJoel Becker * into dlm_lock() and wait...forever. 3150de551246SJoel Becker * 3151de551246SJoel Becker * Why forever? Because another node has asked for the 3152de551246SJoel Becker * lock first; that's why we're here in unblock_lock(). 3153de551246SJoel Becker * 3154de551246SJoel Becker * The solution is OCFS2_LOCK_PENDING. When PENDING is 3155de551246SJoel Becker * set, we just requeue the unblock. Only when the other 3156de551246SJoel Becker * thread has called dlm_lock() and cleared PENDING will 3157de551246SJoel Becker * we then cancel their request. 3158de551246SJoel Becker * 3159de551246SJoel Becker * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3160de551246SJoel Becker * at the same time they set OCFS2_DLM_BUSY. They must 3161de551246SJoel Becker * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3162de551246SJoel Becker */ 3163de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_PENDING) 3164de551246SJoel Becker goto leave_requeue; 3165de551246SJoel Becker 3166d680efe9SMark Fasheh ctl->requeue = 1; 3167ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 3168ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3169ccd979bdSMark Fasheh if (ret) { 3170ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 3171ccd979bdSMark Fasheh if (ret < 0) 3172ccd979bdSMark Fasheh mlog_errno(ret); 3173ccd979bdSMark Fasheh } 3174ccd979bdSMark Fasheh goto leave; 3175ccd979bdSMark Fasheh } 3176ccd979bdSMark Fasheh 3177ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 3178ccd979bdSMark Fasheh * then requeue. */ 3179bd3e7610SJoel Becker if ((lockres->l_blocking == DLM_LOCK_EX) 3180f7fbfdd1SMark Fasheh && (lockres->l_ex_holders || lockres->l_ro_holders)) 3181f7fbfdd1SMark Fasheh goto leave_requeue; 3182ccd979bdSMark Fasheh 3183ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 3184ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 3185bd3e7610SJoel Becker if (lockres->l_blocking == DLM_LOCK_PR && 3186f7fbfdd1SMark Fasheh lockres->l_ex_holders) 3187f7fbfdd1SMark Fasheh goto leave_requeue; 3188f7fbfdd1SMark Fasheh 3189f7fbfdd1SMark Fasheh /* 3190f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 3191f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 3192f7fbfdd1SMark Fasheh */ 3193f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3194f7fbfdd1SMark Fasheh && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 3195f7fbfdd1SMark Fasheh goto leave_requeue; 3196ccd979bdSMark Fasheh 319716d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 319816d5b956SMark Fasheh 319916d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 320016d5b956SMark Fasheh && !lockres->l_ops->check_downconvert(lockres, new_level)) 320116d5b956SMark Fasheh goto leave_requeue; 320216d5b956SMark Fasheh 3203ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 3204ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 3205ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 3206cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 3207ccd979bdSMark Fasheh goto downconvert; 3208ccd979bdSMark Fasheh 3209ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 3210ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 3211ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 3212ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 3213ccd979bdSMark Fasheh blocking = lockres->l_blocking; 3214ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3215ccd979bdSMark Fasheh 3216cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3217d680efe9SMark Fasheh 3218d680efe9SMark Fasheh if (ctl->unblock_action == UNBLOCK_STOP_POST) 3219d680efe9SMark Fasheh goto leave; 3220ccd979bdSMark Fasheh 3221ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3222ccd979bdSMark Fasheh if (blocking != lockres->l_blocking) { 3223ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 3224ccd979bdSMark Fasheh * it just yet. */ 3225ccd979bdSMark Fasheh goto recheck; 3226ccd979bdSMark Fasheh } 3227ccd979bdSMark Fasheh 3228ccd979bdSMark Fasheh downconvert: 3229d680efe9SMark Fasheh ctl->requeue = 0; 3230ccd979bdSMark Fasheh 32315ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3232bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_EX) 32335ef0d4eaSMark Fasheh set_lvb = 1; 32345ef0d4eaSMark Fasheh 32355ef0d4eaSMark Fasheh /* 32365ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 32375ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 32385ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 32395ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 32405ef0d4eaSMark Fasheh */ 32415ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 32425ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 32435ef0d4eaSMark Fasheh } 32445ef0d4eaSMark Fasheh 3245de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, new_level); 3246ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3247de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3248de551246SJoel Becker gen); 3249de551246SJoel Becker 3250ccd979bdSMark Fasheh leave: 3251ccd979bdSMark Fasheh mlog_exit(ret); 3252ccd979bdSMark Fasheh return ret; 3253f7fbfdd1SMark Fasheh 3254f7fbfdd1SMark Fasheh leave_requeue: 3255f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3256f7fbfdd1SMark Fasheh ctl->requeue = 1; 3257f7fbfdd1SMark Fasheh 3258f7fbfdd1SMark Fasheh mlog_exit(0); 3259f7fbfdd1SMark Fasheh return 0; 3260ccd979bdSMark Fasheh } 3261ccd979bdSMark Fasheh 3262d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3263ccd979bdSMark Fasheh int blocking) 3264ccd979bdSMark Fasheh { 3265ccd979bdSMark Fasheh struct inode *inode; 3266ccd979bdSMark Fasheh struct address_space *mapping; 3267ccd979bdSMark Fasheh 3268ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 3269ccd979bdSMark Fasheh mapping = inode->i_mapping; 3270ccd979bdSMark Fasheh 32711044e401SMark Fasheh if (!S_ISREG(inode->i_mode)) 3272f1f54068SMark Fasheh goto out; 3273f1f54068SMark Fasheh 32747f4a2a97SMark Fasheh /* 32757f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 32767f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 32777f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 32787f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 32797f4a2a97SMark Fasheh * them up again. 32807f4a2a97SMark Fasheh */ 32817f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 32827f4a2a97SMark Fasheh 3283ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 3284b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3285b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 3286ccd979bdSMark Fasheh } 3287ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 3288bd3e7610SJoel Becker if (blocking == DLM_LOCK_EX) { 3289ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 3290ccd979bdSMark Fasheh } else { 3291ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 3292ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 3293ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 3294ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 3295ccd979bdSMark Fasheh * them around in that case. */ 3296ccd979bdSMark Fasheh filemap_fdatawait(mapping); 3297ccd979bdSMark Fasheh } 3298ccd979bdSMark Fasheh 3299f1f54068SMark Fasheh out: 3300d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3301ccd979bdSMark Fasheh } 3302ccd979bdSMark Fasheh 3303810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3304810d5aebSMark Fasheh int new_level) 3305810d5aebSMark Fasheh { 3306810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3307810d5aebSMark Fasheh int checkpointed = ocfs2_inode_fully_checkpointed(inode); 3308810d5aebSMark Fasheh 3309bd3e7610SJoel Becker BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3310bd3e7610SJoel Becker BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3311810d5aebSMark Fasheh 3312810d5aebSMark Fasheh if (checkpointed) 3313810d5aebSMark Fasheh return 1; 3314810d5aebSMark Fasheh 3315810d5aebSMark Fasheh ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); 3316810d5aebSMark Fasheh return 0; 3317810d5aebSMark Fasheh } 3318810d5aebSMark Fasheh 3319810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3320810d5aebSMark Fasheh { 3321810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3322810d5aebSMark Fasheh 3323810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 3324810d5aebSMark Fasheh } 3325810d5aebSMark Fasheh 3326d680efe9SMark Fasheh /* 3327d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 332834d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 3329d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 3330d680efe9SMark Fasheh */ 3331d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3332d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3333d680efe9SMark Fasheh { 3334d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3335d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 3336d680efe9SMark Fasheh } 3337d680efe9SMark Fasheh 3338d680efe9SMark Fasheh /* 3339d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 3340d680efe9SMark Fasheh * 3341d680efe9SMark Fasheh * At this point, any process waiting to destroy the 3342d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 3343d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 3344d680efe9SMark Fasheh * 3345d680efe9SMark Fasheh * We have two potential problems 3346d680efe9SMark Fasheh * 3347d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 3348d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3349d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 3350d680efe9SMark Fasheh * reference and push the drop until after we've completed our 3351d680efe9SMark Fasheh * unblock processing. 3352d680efe9SMark Fasheh * 3353d680efe9SMark Fasheh * 2) There might be another process with a final reference, 3354d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 3355d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 3356d680efe9SMark Fasheh */ 3357d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3358d680efe9SMark Fasheh int blocking) 3359d680efe9SMark Fasheh { 3360d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3361d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3362d680efe9SMark Fasheh struct dentry *dentry; 3363d680efe9SMark Fasheh unsigned long flags; 3364d680efe9SMark Fasheh int extra_ref = 0; 3365d680efe9SMark Fasheh 3366d680efe9SMark Fasheh /* 3367d680efe9SMark Fasheh * This node is blocking another node from getting a read 3368d680efe9SMark Fasheh * lock. This happens when we've renamed within a 3369d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 3370d680efe9SMark Fasheh * we never actually dropped our lock because it's still 3371d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 3372d680efe9SMark Fasheh * so there's no further work to do. 3373d680efe9SMark Fasheh */ 3374bd3e7610SJoel Becker if (blocking == DLM_LOCK_PR) 3375d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3376d680efe9SMark Fasheh 3377d680efe9SMark Fasheh /* 3378d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 3379d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 3380d680efe9SMark Fasheh * needs to be freed or not. 3381d680efe9SMark Fasheh */ 3382d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 3383d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3384d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 3385d680efe9SMark Fasheh 3386d680efe9SMark Fasheh /* 3387d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 3388d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 3389d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 3390d680efe9SMark Fasheh * flag. 3391d680efe9SMark Fasheh */ 3392d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3393d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3394d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3395d680efe9SMark Fasheh && dl->dl_count) { 3396d680efe9SMark Fasheh dl->dl_count++; 3397d680efe9SMark Fasheh extra_ref = 1; 3398d680efe9SMark Fasheh } 3399d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3400d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3401d680efe9SMark Fasheh 3402d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 3403d680efe9SMark Fasheh 3404d680efe9SMark Fasheh /* 3405d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 3406d680efe9SMark Fasheh * which means we can't have any more outstanding 3407d680efe9SMark Fasheh * aliases. There's no need to do any more work. 3408d680efe9SMark Fasheh */ 3409d680efe9SMark Fasheh if (!extra_ref) 3410d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3411d680efe9SMark Fasheh 3412d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3413d680efe9SMark Fasheh while (1) { 3414d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 3415d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 3416d680efe9SMark Fasheh if (!dentry) 3417d680efe9SMark Fasheh break; 3418d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3419d680efe9SMark Fasheh 3420d680efe9SMark Fasheh mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 3421d680efe9SMark Fasheh dentry->d_name.name); 3422d680efe9SMark Fasheh 3423d680efe9SMark Fasheh /* 3424d680efe9SMark Fasheh * The following dcache calls may do an 3425d680efe9SMark Fasheh * iput(). Normally we don't want that from the 3426d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 3427d680efe9SMark Fasheh * because the requesting node already has an 3428d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 3429d680efe9SMark Fasheh * for a downconvert. 3430d680efe9SMark Fasheh */ 3431d680efe9SMark Fasheh d_delete(dentry); 3432d680efe9SMark Fasheh dput(dentry); 3433d680efe9SMark Fasheh 3434d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3435d680efe9SMark Fasheh } 3436d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3437d680efe9SMark Fasheh 3438d680efe9SMark Fasheh /* 3439d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 3440d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 3441d680efe9SMark Fasheh */ 3442d680efe9SMark Fasheh if (dl->dl_count == 1) 3443d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 3444d680efe9SMark Fasheh 3445d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 3446d680efe9SMark Fasheh } 3447d680efe9SMark Fasheh 34484670c46dSJoel Becker /* 34494670c46dSJoel Becker * This is the filesystem locking protocol. It provides the lock handling 34504670c46dSJoel Becker * hooks for the underlying DLM. It has a maximum version number. 34514670c46dSJoel Becker * The version number allows interoperability with systems running at 34524670c46dSJoel Becker * the same major number and an equal or smaller minor number. 34534670c46dSJoel Becker * 34544670c46dSJoel Becker * Whenever the filesystem does new things with locks (adds or removes a 34554670c46dSJoel Becker * lock, orders them differently, does different things underneath a lock), 34564670c46dSJoel Becker * the version must be changed. The protocol is negotiated when joining 34574670c46dSJoel Becker * the dlm domain. A node may join the domain if its major version is 34584670c46dSJoel Becker * identical to all other nodes and its minor version is greater than 34594670c46dSJoel Becker * or equal to all other nodes. When its minor version is greater than 34604670c46dSJoel Becker * the other nodes, it will run at the minor version specified by the 34614670c46dSJoel Becker * other nodes. 34624670c46dSJoel Becker * 34634670c46dSJoel Becker * If a locking change is made that will not be compatible with older 34644670c46dSJoel Becker * versions, the major number must be increased and the minor version set 34654670c46dSJoel Becker * to zero. If a change merely adds a behavior that can be disabled when 34664670c46dSJoel Becker * speaking to older versions, the minor version must be increased. If a 34674670c46dSJoel Becker * change adds a fully backwards compatible change (eg, LVB changes that 34684670c46dSJoel Becker * are just ignored by older versions), the version does not need to be 34694670c46dSJoel Becker * updated. 34704670c46dSJoel Becker */ 347124ef1815SJoel Becker static struct ocfs2_locking_protocol lproto = { 34724670c46dSJoel Becker .lp_max_version = { 34734670c46dSJoel Becker .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 34744670c46dSJoel Becker .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 34754670c46dSJoel Becker }, 347624ef1815SJoel Becker .lp_lock_ast = ocfs2_locking_ast, 347724ef1815SJoel Becker .lp_blocking_ast = ocfs2_blocking_ast, 347824ef1815SJoel Becker .lp_unlock_ast = ocfs2_unlock_ast, 347924ef1815SJoel Becker }; 348024ef1815SJoel Becker 348163e0c48aSJoel Becker void ocfs2_set_locking_protocol(void) 348224ef1815SJoel Becker { 348363e0c48aSJoel Becker ocfs2_stack_glue_set_locking_protocol(&lproto); 348424ef1815SJoel Becker } 348524ef1815SJoel Becker 348624ef1815SJoel Becker 348700600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3488ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3489ccd979bdSMark Fasheh { 3490ccd979bdSMark Fasheh int status; 3491d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 3492ccd979bdSMark Fasheh unsigned long flags; 3493ccd979bdSMark Fasheh 3494ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 3495ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 3496ccd979bdSMark Fasheh * flag. */ 3497ccd979bdSMark Fasheh 3498ccd979bdSMark Fasheh mlog_entry_void(); 3499ccd979bdSMark Fasheh 3500ccd979bdSMark Fasheh BUG_ON(!lockres); 3501ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 3502ccd979bdSMark Fasheh 3503ccd979bdSMark Fasheh mlog(0, "lockres %s blocked.\n", lockres->l_name); 3504ccd979bdSMark Fasheh 3505ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 350634d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 3507ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 3508ccd979bdSMark Fasheh * but short circuiting here will still save us some 3509ccd979bdSMark Fasheh * performance. */ 3510ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3511ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 3512ccd979bdSMark Fasheh goto unqueue; 3513ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3514ccd979bdSMark Fasheh 3515b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 3516ccd979bdSMark Fasheh if (status < 0) 3517ccd979bdSMark Fasheh mlog_errno(status); 3518ccd979bdSMark Fasheh 3519ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3520ccd979bdSMark Fasheh unqueue: 3521d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 3522ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 3523ccd979bdSMark Fasheh } else 3524ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 3525ccd979bdSMark Fasheh 3526ccd979bdSMark Fasheh mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 3527d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 3528ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3529ccd979bdSMark Fasheh 3530d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 3531d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 3532d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 3533d680efe9SMark Fasheh 3534ccd979bdSMark Fasheh mlog_exit_void(); 3535ccd979bdSMark Fasheh } 3536ccd979bdSMark Fasheh 3537ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3538ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3539ccd979bdSMark Fasheh { 3540ccd979bdSMark Fasheh mlog_entry_void(); 3541ccd979bdSMark Fasheh 3542ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3543ccd979bdSMark Fasheh 3544ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3545ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 3546ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 3547ccd979bdSMark Fasheh * to the resource will get it soon. */ 3548ccd979bdSMark Fasheh mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 3549ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3550ccd979bdSMark Fasheh return; 3551ccd979bdSMark Fasheh } 3552ccd979bdSMark Fasheh 3553ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 3554ccd979bdSMark Fasheh 355534d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 3556ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 3557ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 3558ccd979bdSMark Fasheh &osb->blocked_lock_list); 3559ccd979bdSMark Fasheh osb->blocked_lock_count++; 3560ccd979bdSMark Fasheh } 356134d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 3562ccd979bdSMark Fasheh 3563ccd979bdSMark Fasheh mlog_exit_void(); 3564ccd979bdSMark Fasheh } 356534d024f8SMark Fasheh 356634d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 356734d024f8SMark Fasheh { 356834d024f8SMark Fasheh unsigned long processed; 356934d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 357034d024f8SMark Fasheh 357134d024f8SMark Fasheh mlog_entry_void(); 357234d024f8SMark Fasheh 357334d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 357434d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 357534d024f8SMark Fasheh * wake happens part-way through our work */ 357634d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 357734d024f8SMark Fasheh 357834d024f8SMark Fasheh processed = osb->blocked_lock_count; 357934d024f8SMark Fasheh while (processed) { 358034d024f8SMark Fasheh BUG_ON(list_empty(&osb->blocked_lock_list)); 358134d024f8SMark Fasheh 358234d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 358334d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 358434d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 358534d024f8SMark Fasheh osb->blocked_lock_count--; 358634d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 358734d024f8SMark Fasheh 358834d024f8SMark Fasheh BUG_ON(!processed); 358934d024f8SMark Fasheh processed--; 359034d024f8SMark Fasheh 359134d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 359234d024f8SMark Fasheh 359334d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 359434d024f8SMark Fasheh } 359534d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 359634d024f8SMark Fasheh 359734d024f8SMark Fasheh mlog_exit_void(); 359834d024f8SMark Fasheh } 359934d024f8SMark Fasheh 360034d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 360134d024f8SMark Fasheh { 360234d024f8SMark Fasheh int empty = 0; 360334d024f8SMark Fasheh 360434d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 360534d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 360634d024f8SMark Fasheh empty = 1; 360734d024f8SMark Fasheh 360834d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 360934d024f8SMark Fasheh return empty; 361034d024f8SMark Fasheh } 361134d024f8SMark Fasheh 361234d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 361334d024f8SMark Fasheh { 361434d024f8SMark Fasheh int should_wake = 0; 361534d024f8SMark Fasheh 361634d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 361734d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 361834d024f8SMark Fasheh should_wake = 1; 361934d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 362034d024f8SMark Fasheh 362134d024f8SMark Fasheh return should_wake; 362234d024f8SMark Fasheh } 362334d024f8SMark Fasheh 3624200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg) 362534d024f8SMark Fasheh { 362634d024f8SMark Fasheh int status = 0; 362734d024f8SMark Fasheh struct ocfs2_super *osb = arg; 362834d024f8SMark Fasheh 362934d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 363034d024f8SMark Fasheh * work available */ 363134d024f8SMark Fasheh while (!(kthread_should_stop() && 363234d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 363334d024f8SMark Fasheh 363434d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 363534d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 363634d024f8SMark Fasheh kthread_should_stop()); 363734d024f8SMark Fasheh 363834d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 363934d024f8SMark Fasheh 364034d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 364134d024f8SMark Fasheh } 364234d024f8SMark Fasheh 364334d024f8SMark Fasheh osb->dc_task = NULL; 364434d024f8SMark Fasheh return status; 364534d024f8SMark Fasheh } 364634d024f8SMark Fasheh 364734d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 364834d024f8SMark Fasheh { 364934d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 365034d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 365134d024f8SMark Fasheh * the caller may have made to the voting state */ 365234d024f8SMark Fasheh osb->dc_wake_sequence++; 365334d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 365434d024f8SMark Fasheh wake_up(&osb->dc_event); 365534d024f8SMark Fasheh } 3656