1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/kthread.h> 31ccd979bdSMark Fasheh #include <linux/pagemap.h> 32ccd979bdSMark Fasheh #include <linux/debugfs.h> 33ccd979bdSMark Fasheh #include <linux/seq_file.h> 348ddb7b00SSunil Mushran #include <linux/time.h> 359e33d69fSJan Kara #include <linux/quotaops.h> 36174cd4b1SIngo Molnar #include <linux/sched/signal.h> 37ccd979bdSMark Fasheh 38ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 39ccd979bdSMark Fasheh #include <cluster/masklog.h> 40ccd979bdSMark Fasheh 41ccd979bdSMark Fasheh #include "ocfs2.h" 42d24fbcdaSJoel Becker #include "ocfs2_lockingver.h" 43ccd979bdSMark Fasheh 44ccd979bdSMark Fasheh #include "alloc.h" 45d680efe9SMark Fasheh #include "dcache.h" 46ccd979bdSMark Fasheh #include "dlmglue.h" 47ccd979bdSMark Fasheh #include "extent_map.h" 487f1a37e3STiger Yang #include "file.h" 49ccd979bdSMark Fasheh #include "heartbeat.h" 50ccd979bdSMark Fasheh #include "inode.h" 51ccd979bdSMark Fasheh #include "journal.h" 5224ef1815SJoel Becker #include "stackglue.h" 53ccd979bdSMark Fasheh #include "slot_map.h" 54ccd979bdSMark Fasheh #include "super.h" 55ccd979bdSMark Fasheh #include "uptodate.h" 569e33d69fSJan Kara #include "quota.h" 578dec98edSTao Ma #include "refcounttree.h" 58b8a7a3a6SAndreas Gruenbacher #include "acl.h" 59ccd979bdSMark Fasheh 60ccd979bdSMark Fasheh #include "buffer_head_io.h" 61ccd979bdSMark Fasheh 62ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 63ccd979bdSMark Fasheh struct list_head mw_item; 64ccd979bdSMark Fasheh int mw_status; 65ccd979bdSMark Fasheh struct completion mw_complete; 66ccd979bdSMark Fasheh unsigned long mw_mask; 67ccd979bdSMark Fasheh unsigned long mw_goal; 688ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 695bc970e8SSunil Mushran ktime_t mw_lock_start; 708ddb7b00SSunil Mushran #endif 71ccd979bdSMark Fasheh }; 72ccd979bdSMark Fasheh 7354a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 7454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 75cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 769e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 77ccd979bdSMark Fasheh 78d680efe9SMark Fasheh /* 79cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 80d680efe9SMark Fasheh * 81b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 82d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 83d680efe9SMark Fasheh * 84d680efe9SMark Fasheh */ 85d680efe9SMark Fasheh enum ocfs2_unblock_action { 86d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 87d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 88d680efe9SMark Fasheh * ->post_unlock callback */ 89d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 90d680efe9SMark Fasheh * ->post_unlock() callback. */ 91d680efe9SMark Fasheh }; 92d680efe9SMark Fasheh 93d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 94d680efe9SMark Fasheh int requeue; 95d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 96d680efe9SMark Fasheh }; 97d680efe9SMark Fasheh 98cb25797dSJan Kara /* Lockdep class keys */ 99cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; 100cb25797dSJan Kara 101810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 102810d5aebSMark Fasheh int new_level); 103810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 104810d5aebSMark Fasheh 105cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 106cc567d89SMark Fasheh int blocking); 107cc567d89SMark Fasheh 108cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 109cc567d89SMark Fasheh int blocking); 110d680efe9SMark Fasheh 111d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 112d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 113ccd979bdSMark Fasheh 1149e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 1156cb129f5SAdrian Bunk 1168dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 1178dec98edSTao Ma int new_level); 1188dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 1198dec98edSTao Ma int blocking); 1208dec98edSTao Ma 1216cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1226cb129f5SAdrian Bunk 1236cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1246cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1256cb129f5SAdrian Bunk const char *function, 1266cb129f5SAdrian Bunk unsigned int line, 1276cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1286cb129f5SAdrian Bunk { 129a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 1306cb129f5SAdrian Bunk 1316cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1326cb129f5SAdrian Bunk lockres->l_name, function, line); 1336cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1346cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1356cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1366cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1376cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1386cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1396cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1406cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1416cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1426cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1436cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1446cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1456cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1466cb129f5SAdrian Bunk } 1476cb129f5SAdrian Bunk 1486cb129f5SAdrian Bunk 149f625c979SMark Fasheh /* 150f625c979SMark Fasheh * OCFS2 Lock Resource Operations 151f625c979SMark Fasheh * 152f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1530d5dc6c2SMark Fasheh * 1540d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1550d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1560d5dc6c2SMark Fasheh * 1570d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1580d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1590d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1600d5dc6c2SMark Fasheh * destruction time). 161f625c979SMark Fasheh */ 162ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 16354a7e755SMark Fasheh /* 16454a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 16554a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 16654a7e755SMark Fasheh */ 16754a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 168b5e500e2SMark Fasheh 1690d5dc6c2SMark Fasheh /* 17034d024f8SMark Fasheh * Optionally called in the downconvert thread after a 17134d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 17234d024f8SMark Fasheh * after this callback is called, so it is safe to free 17334d024f8SMark Fasheh * memory, etc. 1740d5dc6c2SMark Fasheh * 1750d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1760d5dc6c2SMark Fasheh * by ->downconvert_worker() 1770d5dc6c2SMark Fasheh */ 178d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 179f625c979SMark Fasheh 180f625c979SMark Fasheh /* 18116d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 18216d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 18316d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 18416d5b956SMark Fasheh * 18516d5b956SMark Fasheh * For most locks, the default checks that there are no 18616d5b956SMark Fasheh * incompatible holders are sufficient. 18716d5b956SMark Fasheh * 18816d5b956SMark Fasheh * Called with the lockres spinlock held. 18916d5b956SMark Fasheh */ 19016d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 19116d5b956SMark Fasheh 19216d5b956SMark Fasheh /* 1935ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1945ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1955ef0d4eaSMark Fasheh * 1965ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1975ef0d4eaSMark Fasheh * in the flags field. 1985ef0d4eaSMark Fasheh * 1995ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 2005ef0d4eaSMark Fasheh */ 2015ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 2025ef0d4eaSMark Fasheh 2035ef0d4eaSMark Fasheh /* 204cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 205cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 206cc567d89SMark Fasheh * any locks held so the function can do work that might 207cc567d89SMark Fasheh * schedule (syncing out data, etc). 208cc567d89SMark Fasheh * 209cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 210cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 211cc567d89SMark Fasheh */ 212cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 213cc567d89SMark Fasheh 214cc567d89SMark Fasheh /* 215f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 216f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 217f625c979SMark Fasheh */ 218f625c979SMark Fasheh int flags; 219ccd979bdSMark Fasheh }; 220ccd979bdSMark Fasheh 221f625c979SMark Fasheh /* 222f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 223f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 224f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 225f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 226f625c979SMark Fasheh * expected that the locking wrapper will clear the 227f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 228f625c979SMark Fasheh */ 229f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 230f625c979SMark Fasheh 231b80fc012SMark Fasheh /* 2325ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2335ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 234b80fc012SMark Fasheh */ 235b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 236b80fc012SMark Fasheh 237ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 23854a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 239f625c979SMark Fasheh .flags = 0, 240ccd979bdSMark Fasheh }; 241ccd979bdSMark Fasheh 242e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 24354a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 244810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 245810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 246f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 247b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 248ccd979bdSMark Fasheh }; 249ccd979bdSMark Fasheh 250ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 251f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 252ccd979bdSMark Fasheh }; 253ccd979bdSMark Fasheh 254ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 255f625c979SMark Fasheh .flags = 0, 256ccd979bdSMark Fasheh }; 257ccd979bdSMark Fasheh 2586ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { 2596ca497a8Swengang wang .flags = 0, 2606ca497a8Swengang wang }; 2616ca497a8Swengang wang 26283273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 26383273932SSrinivas Eeda .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 26483273932SSrinivas Eeda }; 26583273932SSrinivas Eeda 266d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 26754a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 268d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 269cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 270f625c979SMark Fasheh .flags = 0, 271d680efe9SMark Fasheh }; 272d680efe9SMark Fasheh 27350008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 27450008630STiger Yang .get_osb = ocfs2_get_inode_osb, 27550008630STiger Yang .flags = 0, 27650008630STiger Yang }; 27750008630STiger Yang 278cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 279cf8e06f1SMark Fasheh .get_osb = ocfs2_get_file_osb, 280cf8e06f1SMark Fasheh .flags = 0, 281cf8e06f1SMark Fasheh }; 282cf8e06f1SMark Fasheh 2839e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 2849e33d69fSJan Kara .set_lvb = ocfs2_set_qinfo_lvb, 2859e33d69fSJan Kara .get_osb = ocfs2_get_qinfo_osb, 2869e33d69fSJan Kara .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 2879e33d69fSJan Kara }; 2889e33d69fSJan Kara 2898dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { 2908dec98edSTao Ma .check_downconvert = ocfs2_check_refcount_downconvert, 2918dec98edSTao Ma .downconvert_worker = ocfs2_refcount_convert_worker, 2928dec98edSTao Ma .flags = 0, 2938dec98edSTao Ma }; 2948dec98edSTao Ma 295ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 296ccd979bdSMark Fasheh { 297ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 29850008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 29950008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 300ccd979bdSMark Fasheh } 301ccd979bdSMark Fasheh 302c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 303a796d286SJoel Becker { 304a796d286SJoel Becker return container_of(lksb, struct ocfs2_lock_res, l_lksb); 305a796d286SJoel Becker } 306a796d286SJoel Becker 307ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 308ccd979bdSMark Fasheh { 309ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 310ccd979bdSMark Fasheh 311ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 312ccd979bdSMark Fasheh } 313ccd979bdSMark Fasheh 314d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 315d680efe9SMark Fasheh { 316d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 317d680efe9SMark Fasheh 318d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 319d680efe9SMark Fasheh } 320d680efe9SMark Fasheh 3219e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 3229e33d69fSJan Kara { 3239e33d69fSJan Kara BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 3249e33d69fSJan Kara 3259e33d69fSJan Kara return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 3269e33d69fSJan Kara } 3279e33d69fSJan Kara 3288dec98edSTao Ma static inline struct ocfs2_refcount_tree * 3298dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) 3308dec98edSTao Ma { 3318dec98edSTao Ma return container_of(res, struct ocfs2_refcount_tree, rf_lockres); 3328dec98edSTao Ma } 3338dec98edSTao Ma 33454a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 33554a7e755SMark Fasheh { 33654a7e755SMark Fasheh if (lockres->l_ops->get_osb) 33754a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 33854a7e755SMark Fasheh 33954a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 34054a7e755SMark Fasheh } 34154a7e755SMark Fasheh 342ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 343ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 344ccd979bdSMark Fasheh int level, 345bd3e7610SJoel Becker u32 dlm_flags); 346ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 347ccd979bdSMark Fasheh int wanted); 348cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 349ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 350cb25797dSJan Kara int level, unsigned long caller_ip); 351cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, 352cb25797dSJan Kara struct ocfs2_lock_res *lockres, 353cb25797dSJan Kara int level) 354cb25797dSJan Kara { 355cb25797dSJan Kara __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); 356cb25797dSJan Kara } 357cb25797dSJan Kara 358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 359ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 360ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 361ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 362ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 363ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 364ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 365ccd979bdSMark Fasheh int convert); 3667431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 367c74ff8bbSSunil Mushran if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ 3687431cd7eSJoel Becker mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 3697431cd7eSJoel Becker _err, _func, _lockres->l_name); \ 370c74ff8bbSSunil Mushran else \ 371c74ff8bbSSunil Mushran mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ 372c74ff8bbSSunil Mushran _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ 373c74ff8bbSSunil Mushran (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ 374ccd979bdSMark Fasheh } while (0) 37534d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 37634d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 377ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 378e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 379ccd979bdSMark Fasheh struct buffer_head **bh); 380ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 381ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 382de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 383cf8e06f1SMark Fasheh int new_level); 384cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 385cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres, 386cf8e06f1SMark Fasheh int new_level, 387de551246SJoel Becker int lvb, 388de551246SJoel Becker unsigned int generation); 389cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 390cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 391cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 392cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 393cf8e06f1SMark Fasheh 394ccd979bdSMark Fasheh 395ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 396ccd979bdSMark Fasheh u64 blkno, 397ccd979bdSMark Fasheh u32 generation, 398ccd979bdSMark Fasheh char *name) 399ccd979bdSMark Fasheh { 400ccd979bdSMark Fasheh int len; 401ccd979bdSMark Fasheh 402ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 403ccd979bdSMark Fasheh 404b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 405b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 406b0697053SMark Fasheh (long long)blkno, generation); 407ccd979bdSMark Fasheh 408ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 409ccd979bdSMark Fasheh 410ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 411ccd979bdSMark Fasheh } 412ccd979bdSMark Fasheh 41334af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 414ccd979bdSMark Fasheh 415ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 416ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 417ccd979bdSMark Fasheh { 418ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 419ccd979bdSMark Fasheh 420ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 421ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 422ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 423ccd979bdSMark Fasheh } 424ccd979bdSMark Fasheh 425ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 426ccd979bdSMark Fasheh { 427ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 428ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 429ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 430ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 431ccd979bdSMark Fasheh } 432ccd979bdSMark Fasheh 4338ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 4348ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4358ddb7b00SSunil Mushran { 4368ddb7b00SSunil Mushran res->l_lock_refresh = 0; 4375bc970e8SSunil Mushran memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); 4385bc970e8SSunil Mushran memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); 4398ddb7b00SSunil Mushran } 4408ddb7b00SSunil Mushran 4418ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 4428ddb7b00SSunil Mushran struct ocfs2_mask_waiter *mw, int ret) 4438ddb7b00SSunil Mushran { 4445bc970e8SSunil Mushran u32 usec; 4455bc970e8SSunil Mushran ktime_t kt; 4465bc970e8SSunil Mushran struct ocfs2_lock_stats *stats; 4478ddb7b00SSunil Mushran 4485bc970e8SSunil Mushran if (level == LKM_PRMODE) 4495bc970e8SSunil Mushran stats = &res->l_lock_prmode; 4505bc970e8SSunil Mushran else if (level == LKM_EXMODE) 4515bc970e8SSunil Mushran stats = &res->l_lock_exmode; 4525bc970e8SSunil Mushran else 4538ddb7b00SSunil Mushran return; 4548ddb7b00SSunil Mushran 4555bc970e8SSunil Mushran kt = ktime_sub(ktime_get(), mw->mw_lock_start); 4565bc970e8SSunil Mushran usec = ktime_to_us(kt); 4575bc970e8SSunil Mushran 4585bc970e8SSunil Mushran stats->ls_gets++; 4595bc970e8SSunil Mushran stats->ls_total += ktime_to_ns(kt); 4605bc970e8SSunil Mushran /* overflow */ 46116865b7cSroel if (unlikely(stats->ls_gets == 0)) { 4625bc970e8SSunil Mushran stats->ls_gets++; 4635bc970e8SSunil Mushran stats->ls_total = ktime_to_ns(kt); 4645bc970e8SSunil Mushran } 4655bc970e8SSunil Mushran 4665bc970e8SSunil Mushran if (stats->ls_max < usec) 4675bc970e8SSunil Mushran stats->ls_max = usec; 4685bc970e8SSunil Mushran 4698ddb7b00SSunil Mushran if (ret) 4705bc970e8SSunil Mushran stats->ls_fail++; 4718ddb7b00SSunil Mushran } 4728ddb7b00SSunil Mushran 4738ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4748ddb7b00SSunil Mushran { 4758ddb7b00SSunil Mushran lockres->l_lock_refresh++; 4768ddb7b00SSunil Mushran } 4778ddb7b00SSunil Mushran 4788ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4798ddb7b00SSunil Mushran { 4805bc970e8SSunil Mushran mw->mw_lock_start = ktime_get(); 4818ddb7b00SSunil Mushran } 4828ddb7b00SSunil Mushran #else 4838ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4848ddb7b00SSunil Mushran { 4858ddb7b00SSunil Mushran } 4868ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 4878ddb7b00SSunil Mushran int level, struct ocfs2_mask_waiter *mw, int ret) 4888ddb7b00SSunil Mushran { 4898ddb7b00SSunil Mushran } 4908ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4918ddb7b00SSunil Mushran { 4928ddb7b00SSunil Mushran } 4938ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4948ddb7b00SSunil Mushran { 4958ddb7b00SSunil Mushran } 4968ddb7b00SSunil Mushran #endif 4978ddb7b00SSunil Mushran 498ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 499ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 500ccd979bdSMark Fasheh enum ocfs2_lock_type type, 501ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 502ccd979bdSMark Fasheh void *priv) 503ccd979bdSMark Fasheh { 504ccd979bdSMark Fasheh res->l_type = type; 505ccd979bdSMark Fasheh res->l_ops = ops; 506ccd979bdSMark Fasheh res->l_priv = priv; 507ccd979bdSMark Fasheh 508bd3e7610SJoel Becker res->l_level = DLM_LOCK_IV; 509bd3e7610SJoel Becker res->l_requested = DLM_LOCK_IV; 510bd3e7610SJoel Becker res->l_blocking = DLM_LOCK_IV; 511ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 512ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 513ccd979bdSMark Fasheh 514ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 515ccd979bdSMark Fasheh 516ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 5178ddb7b00SSunil Mushran 5188ddb7b00SSunil Mushran ocfs2_init_lock_stats(res); 519cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 520cb25797dSJan Kara if (type != OCFS2_LOCK_TYPE_OPEN) 521cb25797dSJan Kara lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], 522cb25797dSJan Kara &lockdep_keys[type], 0); 523cb25797dSJan Kara else 524cb25797dSJan Kara res->l_lockdep_map.key = NULL; 525cb25797dSJan Kara #endif 526ccd979bdSMark Fasheh } 527ccd979bdSMark Fasheh 528ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 529ccd979bdSMark Fasheh { 530ccd979bdSMark Fasheh /* This also clears out the lock status block */ 531ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 532ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 533ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 534ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 535ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 536439a36b8SEric Ren INIT_LIST_HEAD(&res->l_holders); 537ccd979bdSMark Fasheh } 538ccd979bdSMark Fasheh 539ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 540ccd979bdSMark Fasheh enum ocfs2_lock_type type, 54124c19ef4SMark Fasheh unsigned int generation, 542ccd979bdSMark Fasheh struct inode *inode) 543ccd979bdSMark Fasheh { 544ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 545ccd979bdSMark Fasheh 546ccd979bdSMark Fasheh switch(type) { 547ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 548ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 549ccd979bdSMark Fasheh break; 550ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 551e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 552ccd979bdSMark Fasheh break; 55350008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 55450008630STiger Yang ops = &ocfs2_inode_open_lops; 55550008630STiger Yang break; 556ccd979bdSMark Fasheh default: 557ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 558ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 559ccd979bdSMark Fasheh break; 560ccd979bdSMark Fasheh }; 561ccd979bdSMark Fasheh 562d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 56324c19ef4SMark Fasheh generation, res->l_name); 564d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 565d680efe9SMark Fasheh } 566d680efe9SMark Fasheh 56754a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 56854a7e755SMark Fasheh { 56954a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 57054a7e755SMark Fasheh 57154a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 57254a7e755SMark Fasheh } 57354a7e755SMark Fasheh 5749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 5759e33d69fSJan Kara { 5769e33d69fSJan Kara struct ocfs2_mem_dqinfo *info = lockres->l_priv; 5779e33d69fSJan Kara 5789e33d69fSJan Kara return OCFS2_SB(info->dqi_gi.dqi_sb); 5799e33d69fSJan Kara } 5809e33d69fSJan Kara 581cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 582cf8e06f1SMark Fasheh { 583cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = lockres->l_priv; 584cf8e06f1SMark Fasheh 585cf8e06f1SMark Fasheh return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 586cf8e06f1SMark Fasheh } 587cf8e06f1SMark Fasheh 588d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 589d680efe9SMark Fasheh { 590d680efe9SMark Fasheh __be64 inode_blkno_be; 591d680efe9SMark Fasheh 592d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 593d680efe9SMark Fasheh sizeof(__be64)); 594d680efe9SMark Fasheh 595d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 596d680efe9SMark Fasheh } 597d680efe9SMark Fasheh 59854a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 59954a7e755SMark Fasheh { 60054a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 60154a7e755SMark Fasheh 60254a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 60354a7e755SMark Fasheh } 60454a7e755SMark Fasheh 605d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 606d680efe9SMark Fasheh u64 parent, struct inode *inode) 607d680efe9SMark Fasheh { 608d680efe9SMark Fasheh int len; 609d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 610d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 611d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 612d680efe9SMark Fasheh 613d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 614d680efe9SMark Fasheh 615d680efe9SMark Fasheh /* 616d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 617d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 618d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 619d680efe9SMark Fasheh * want error prints to show something without garbling the 620d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 621d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 622d680efe9SMark Fasheh * binary lock names. The stringified names have been a 623d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 624d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 625d680efe9SMark Fasheh * 626d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 627d680efe9SMark Fasheh * name size stays the same though - the last part is all 628d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 629d680efe9SMark Fasheh */ 630d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 631d680efe9SMark Fasheh "%c%016llx", 632d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 633d680efe9SMark Fasheh (long long)parent); 634d680efe9SMark Fasheh 635d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 636d680efe9SMark Fasheh 637d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 638d680efe9SMark Fasheh sizeof(__be64)); 639d680efe9SMark Fasheh 640d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 641d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 642d680efe9SMark Fasheh dl); 643ccd979bdSMark Fasheh } 644ccd979bdSMark Fasheh 645ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 646ccd979bdSMark Fasheh struct ocfs2_super *osb) 647ccd979bdSMark Fasheh { 648ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 649ccd979bdSMark Fasheh * once on it manually. */ 650ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 651d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 652d680efe9SMark Fasheh 0, res->l_name); 653ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 654ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 655ccd979bdSMark Fasheh } 656ccd979bdSMark Fasheh 657ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 658ccd979bdSMark Fasheh struct ocfs2_super *osb) 659ccd979bdSMark Fasheh { 660ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 661ccd979bdSMark Fasheh * once on it manually. */ 662ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 663d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 664d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 665ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 666ccd979bdSMark Fasheh } 667ccd979bdSMark Fasheh 6686ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, 6696ca497a8Swengang wang struct ocfs2_super *osb) 6706ca497a8Swengang wang { 6716ca497a8Swengang wang /* nfs_sync lockres doesn't come from a slab so we call init 6726ca497a8Swengang wang * once on it manually. */ 6736ca497a8Swengang wang ocfs2_lock_res_init_once(res); 6746ca497a8Swengang wang ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); 6756ca497a8Swengang wang ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, 6766ca497a8Swengang wang &ocfs2_nfs_sync_lops, osb); 6776ca497a8Swengang wang } 6786ca497a8Swengang wang 67983273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 68083273932SSrinivas Eeda struct ocfs2_super *osb) 68183273932SSrinivas Eeda { 68283273932SSrinivas Eeda ocfs2_lock_res_init_once(res); 68383273932SSrinivas Eeda ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 68483273932SSrinivas Eeda ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 68583273932SSrinivas Eeda &ocfs2_orphan_scan_lops, osb); 68683273932SSrinivas Eeda } 68783273932SSrinivas Eeda 688cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 689cf8e06f1SMark Fasheh struct ocfs2_file_private *fp) 690cf8e06f1SMark Fasheh { 691cf8e06f1SMark Fasheh struct inode *inode = fp->fp_file->f_mapping->host; 692cf8e06f1SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 693cf8e06f1SMark Fasheh 694cf8e06f1SMark Fasheh ocfs2_lock_res_init_once(lockres); 695cf8e06f1SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 696cf8e06f1SMark Fasheh inode->i_generation, lockres->l_name); 697cf8e06f1SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 698cf8e06f1SMark Fasheh OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 699cf8e06f1SMark Fasheh fp); 700cf8e06f1SMark Fasheh lockres->l_flags |= OCFS2_LOCK_NOCACHE; 701cf8e06f1SMark Fasheh } 702cf8e06f1SMark Fasheh 7039e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 7049e33d69fSJan Kara struct ocfs2_mem_dqinfo *info) 7059e33d69fSJan Kara { 7069e33d69fSJan Kara ocfs2_lock_res_init_once(lockres); 7079e33d69fSJan Kara ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 7089e33d69fSJan Kara 0, lockres->l_name); 7099e33d69fSJan Kara ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 7109e33d69fSJan Kara OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 7119e33d69fSJan Kara info); 7129e33d69fSJan Kara } 7139e33d69fSJan Kara 7148dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, 7158dec98edSTao Ma struct ocfs2_super *osb, u64 ref_blkno, 7168dec98edSTao Ma unsigned int generation) 7178dec98edSTao Ma { 7188dec98edSTao Ma ocfs2_lock_res_init_once(lockres); 7198dec98edSTao Ma ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, 7208dec98edSTao Ma generation, lockres->l_name); 7218dec98edSTao Ma ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, 7228dec98edSTao Ma &ocfs2_refcount_block_lops, osb); 7238dec98edSTao Ma } 7248dec98edSTao Ma 725ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 726ccd979bdSMark Fasheh { 727ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 728ccd979bdSMark Fasheh return; 729ccd979bdSMark Fasheh 730ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 731ccd979bdSMark Fasheh 732ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 733ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 734ccd979bdSMark Fasheh res->l_name); 735ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 736ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 737ccd979bdSMark Fasheh res->l_name); 738ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 739ccd979bdSMark Fasheh "Lockres %s is locked\n", 740ccd979bdSMark Fasheh res->l_name); 741ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 742ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 743ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 744ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 745ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 746ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 747ccd979bdSMark Fasheh 748ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 749ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 750ccd979bdSMark Fasheh 751ccd979bdSMark Fasheh res->l_flags = 0UL; 752ccd979bdSMark Fasheh } 753ccd979bdSMark Fasheh 754439a36b8SEric Ren /* 755439a36b8SEric Ren * Keep a list of processes who have interest in a lockres. 756439a36b8SEric Ren * Note: this is now only uesed for check recursive cluster locking. 757439a36b8SEric Ren */ 758439a36b8SEric Ren static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres, 759439a36b8SEric Ren struct ocfs2_lock_holder *oh) 760439a36b8SEric Ren { 761439a36b8SEric Ren INIT_LIST_HEAD(&oh->oh_list); 762439a36b8SEric Ren oh->oh_owner_pid = get_pid(task_pid(current)); 763439a36b8SEric Ren 764439a36b8SEric Ren spin_lock(&lockres->l_lock); 765439a36b8SEric Ren list_add_tail(&oh->oh_list, &lockres->l_holders); 766439a36b8SEric Ren spin_unlock(&lockres->l_lock); 767439a36b8SEric Ren } 768439a36b8SEric Ren 769439a36b8SEric Ren static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, 770439a36b8SEric Ren struct ocfs2_lock_holder *oh) 771439a36b8SEric Ren { 772439a36b8SEric Ren spin_lock(&lockres->l_lock); 773439a36b8SEric Ren list_del(&oh->oh_list); 774439a36b8SEric Ren spin_unlock(&lockres->l_lock); 775439a36b8SEric Ren 776439a36b8SEric Ren put_pid(oh->oh_owner_pid); 777439a36b8SEric Ren } 778439a36b8SEric Ren 779439a36b8SEric Ren static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres) 780439a36b8SEric Ren { 781439a36b8SEric Ren struct ocfs2_lock_holder *oh; 782439a36b8SEric Ren struct pid *pid; 783439a36b8SEric Ren 784439a36b8SEric Ren /* look in the list of holders for one with the current task as owner */ 785439a36b8SEric Ren spin_lock(&lockres->l_lock); 786439a36b8SEric Ren pid = task_pid(current); 787439a36b8SEric Ren list_for_each_entry(oh, &lockres->l_holders, oh_list) { 788439a36b8SEric Ren if (oh->oh_owner_pid == pid) { 789439a36b8SEric Ren spin_unlock(&lockres->l_lock); 790439a36b8SEric Ren return 1; 791439a36b8SEric Ren } 792439a36b8SEric Ren } 793439a36b8SEric Ren spin_unlock(&lockres->l_lock); 794439a36b8SEric Ren 795439a36b8SEric Ren return 0; 796439a36b8SEric Ren } 797439a36b8SEric Ren 798ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 799ccd979bdSMark Fasheh int level) 800ccd979bdSMark Fasheh { 801ccd979bdSMark Fasheh BUG_ON(!lockres); 802ccd979bdSMark Fasheh 803ccd979bdSMark Fasheh switch(level) { 804bd3e7610SJoel Becker case DLM_LOCK_EX: 805ccd979bdSMark Fasheh lockres->l_ex_holders++; 806ccd979bdSMark Fasheh break; 807bd3e7610SJoel Becker case DLM_LOCK_PR: 808ccd979bdSMark Fasheh lockres->l_ro_holders++; 809ccd979bdSMark Fasheh break; 810ccd979bdSMark Fasheh default: 811ccd979bdSMark Fasheh BUG(); 812ccd979bdSMark Fasheh } 813ccd979bdSMark Fasheh } 814ccd979bdSMark Fasheh 815ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 816ccd979bdSMark Fasheh int level) 817ccd979bdSMark Fasheh { 818ccd979bdSMark Fasheh BUG_ON(!lockres); 819ccd979bdSMark Fasheh 820ccd979bdSMark Fasheh switch(level) { 821bd3e7610SJoel Becker case DLM_LOCK_EX: 822ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 823ccd979bdSMark Fasheh lockres->l_ex_holders--; 824ccd979bdSMark Fasheh break; 825bd3e7610SJoel Becker case DLM_LOCK_PR: 826ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 827ccd979bdSMark Fasheh lockres->l_ro_holders--; 828ccd979bdSMark Fasheh break; 829ccd979bdSMark Fasheh default: 830ccd979bdSMark Fasheh BUG(); 831ccd979bdSMark Fasheh } 832ccd979bdSMark Fasheh } 833ccd979bdSMark Fasheh 834ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 835ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 836ccd979bdSMark Fasheh * lock types are added. */ 837ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 838ccd979bdSMark Fasheh { 839bd3e7610SJoel Becker int new_level = DLM_LOCK_EX; 840ccd979bdSMark Fasheh 841bd3e7610SJoel Becker if (level == DLM_LOCK_EX) 842bd3e7610SJoel Becker new_level = DLM_LOCK_NL; 843bd3e7610SJoel Becker else if (level == DLM_LOCK_PR) 844bd3e7610SJoel Becker new_level = DLM_LOCK_PR; 845ccd979bdSMark Fasheh return new_level; 846ccd979bdSMark Fasheh } 847ccd979bdSMark Fasheh 848ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 849ccd979bdSMark Fasheh unsigned long newflags) 850ccd979bdSMark Fasheh { 851800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 852ccd979bdSMark Fasheh 853ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 854ccd979bdSMark Fasheh 855ccd979bdSMark Fasheh lockres->l_flags = newflags; 856ccd979bdSMark Fasheh 857800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 858ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 859ccd979bdSMark Fasheh continue; 860ccd979bdSMark Fasheh 861ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 862ccd979bdSMark Fasheh mw->mw_status = 0; 863ccd979bdSMark Fasheh complete(&mw->mw_complete); 864ccd979bdSMark Fasheh } 865ccd979bdSMark Fasheh } 866ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 867ccd979bdSMark Fasheh { 868ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 869ccd979bdSMark Fasheh } 870ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 871ccd979bdSMark Fasheh unsigned long clear) 872ccd979bdSMark Fasheh { 873ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 874ccd979bdSMark Fasheh } 875ccd979bdSMark Fasheh 876ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 877ccd979bdSMark Fasheh { 878ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 879ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 880ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 881bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 882ccd979bdSMark Fasheh 883ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 884ccd979bdSMark Fasheh if (lockres->l_level <= 885ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 886bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_NL; 887ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 888ccd979bdSMark Fasheh } 889ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 890ccd979bdSMark Fasheh } 891ccd979bdSMark Fasheh 892ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 893ccd979bdSMark Fasheh { 894ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 895ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 896ccd979bdSMark Fasheh 897ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 898ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 899ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 900ccd979bdSMark Fasheh * update */ 901bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_NL && 902f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 903ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 904ccd979bdSMark Fasheh 905ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 906a1912826SSunil Mushran 907a1912826SSunil Mushran /* 908a1912826SSunil Mushran * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing 909a1912826SSunil Mushran * the OCFS2_LOCK_BUSY flag to prevent the dc thread from 910a1912826SSunil Mushran * downconverting the lock before the upconvert has fully completed. 911d1e78238SXue jiufei * Do not prevent the dc thread from downconverting if NONBLOCK lock 912d1e78238SXue jiufei * had already returned. 913a1912826SSunil Mushran */ 914d1e78238SXue jiufei if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED)) 915a1912826SSunil Mushran lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 916d1e78238SXue jiufei else 917d1e78238SXue jiufei lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED); 918a1912826SSunil Mushran 919ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 920ccd979bdSMark Fasheh } 921ccd979bdSMark Fasheh 922ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 923ccd979bdSMark Fasheh { 9243cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 925ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 926ccd979bdSMark Fasheh 927bd3e7610SJoel Becker if (lockres->l_requested > DLM_LOCK_NL && 928f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 929f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 930ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 931ccd979bdSMark Fasheh 932ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 933ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 934ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 935ccd979bdSMark Fasheh } 936ccd979bdSMark Fasheh 937ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 938ccd979bdSMark Fasheh int level) 939ccd979bdSMark Fasheh { 940ccd979bdSMark Fasheh int needs_downconvert = 0; 941ccd979bdSMark Fasheh 942ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 943ccd979bdSMark Fasheh 944ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 945ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 946ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 947ccd979bdSMark Fasheh * blocking. this also catches the case where we get 948ccd979bdSMark Fasheh * duplicate BASTs */ 949ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 950ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 951ccd979bdSMark Fasheh needs_downconvert = 1; 952ccd979bdSMark Fasheh 953ccd979bdSMark Fasheh lockres->l_blocking = level; 954ccd979bdSMark Fasheh } 955ccd979bdSMark Fasheh 9569b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", 9579b915181SSunil Mushran lockres->l_name, level, lockres->l_level, lockres->l_blocking, 9589b915181SSunil Mushran needs_downconvert); 9599b915181SSunil Mushran 9600b94a909SWengang Wang if (needs_downconvert) 9610b94a909SWengang Wang lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 962c1e8d35eSTao Ma mlog(0, "needs_downconvert = %d\n", needs_downconvert); 963ccd979bdSMark Fasheh return needs_downconvert; 964ccd979bdSMark Fasheh } 965ccd979bdSMark Fasheh 966de551246SJoel Becker /* 967de551246SJoel Becker * OCFS2_LOCK_PENDING and l_pending_gen. 968de551246SJoel Becker * 969de551246SJoel Becker * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 970de551246SJoel Becker * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 971de551246SJoel Becker * for more details on the race. 972de551246SJoel Becker * 973de551246SJoel Becker * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 974de551246SJoel Becker * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 975de551246SJoel Becker * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 976de551246SJoel Becker * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 977de551246SJoel Becker * the caller is going to try to clear PENDING again. If nothing else is 978de551246SJoel Becker * happening, __lockres_clear_pending() sees PENDING is unset and does 979de551246SJoel Becker * nothing. 980de551246SJoel Becker * 981de551246SJoel Becker * But what if another path (eg downconvert thread) has just started a 982de551246SJoel Becker * new locking action? The other path has re-set PENDING. Our path 983de551246SJoel Becker * cannot clear PENDING, because that will re-open the original race 984de551246SJoel Becker * window. 985de551246SJoel Becker * 986de551246SJoel Becker * [Example] 987de551246SJoel Becker * 988de551246SJoel Becker * ocfs2_meta_lock() 989de551246SJoel Becker * ocfs2_cluster_lock() 990de551246SJoel Becker * set BUSY 991de551246SJoel Becker * set PENDING 992de551246SJoel Becker * drop l_lock 993de551246SJoel Becker * ocfs2_dlm_lock() 994de551246SJoel Becker * ocfs2_locking_ast() ocfs2_downconvert_thread() 995de551246SJoel Becker * clear PENDING ocfs2_unblock_lock() 996de551246SJoel Becker * take_l_lock 997de551246SJoel Becker * !BUSY 998de551246SJoel Becker * ocfs2_prepare_downconvert() 999de551246SJoel Becker * set BUSY 1000de551246SJoel Becker * set PENDING 1001de551246SJoel Becker * drop l_lock 1002de551246SJoel Becker * take l_lock 1003de551246SJoel Becker * clear PENDING 1004de551246SJoel Becker * drop l_lock 1005de551246SJoel Becker * <window> 1006de551246SJoel Becker * ocfs2_dlm_lock() 1007de551246SJoel Becker * 1008de551246SJoel Becker * So as you can see, we now have a window where l_lock is not held, 1009de551246SJoel Becker * PENDING is not set, and ocfs2_dlm_lock() has not been called. 1010de551246SJoel Becker * 1011de551246SJoel Becker * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 1012de551246SJoel Becker * set by ocfs2_prepare_downconvert(). That wasn't nice. 1013de551246SJoel Becker * 1014de551246SJoel Becker * To solve this we introduce l_pending_gen. A call to 1015de551246SJoel Becker * lockres_clear_pending() will only do so when it is passed a generation 1016de551246SJoel Becker * number that matches the lockres. lockres_set_pending() will return the 1017de551246SJoel Becker * current generation number. When ocfs2_cluster_lock() goes to clear 1018de551246SJoel Becker * PENDING, it passes the generation it got from set_pending(). In our 1019de551246SJoel Becker * example above, the generation numbers will *not* match. Thus, 1020de551246SJoel Becker * ocfs2_cluster_lock() will not clear the PENDING set by 1021de551246SJoel Becker * ocfs2_prepare_downconvert(). 1022de551246SJoel Becker */ 1023de551246SJoel Becker 1024de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */ 1025de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 1026de551246SJoel Becker unsigned int generation, 1027de551246SJoel Becker struct ocfs2_super *osb) 1028de551246SJoel Becker { 1029de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 1030de551246SJoel Becker 1031de551246SJoel Becker /* 1032de551246SJoel Becker * The ast and locking functions can race us here. The winner 1033de551246SJoel Becker * will clear pending, the loser will not. 1034de551246SJoel Becker */ 1035de551246SJoel Becker if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 1036de551246SJoel Becker (lockres->l_pending_gen != generation)) 1037de551246SJoel Becker return; 1038de551246SJoel Becker 1039de551246SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 1040de551246SJoel Becker lockres->l_pending_gen++; 1041de551246SJoel Becker 1042de551246SJoel Becker /* 1043de551246SJoel Becker * The downconvert thread may have skipped us because we 1044de551246SJoel Becker * were PENDING. Wake it up. 1045de551246SJoel Becker */ 1046de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1047de551246SJoel Becker ocfs2_wake_downconvert_thread(osb); 1048de551246SJoel Becker } 1049de551246SJoel Becker 1050de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */ 1051de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 1052de551246SJoel Becker unsigned int generation, 1053de551246SJoel Becker struct ocfs2_super *osb) 1054de551246SJoel Becker { 1055de551246SJoel Becker unsigned long flags; 1056de551246SJoel Becker 1057de551246SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1058de551246SJoel Becker __lockres_clear_pending(lockres, generation, osb); 1059de551246SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1060de551246SJoel Becker } 1061de551246SJoel Becker 1062de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 1063de551246SJoel Becker { 1064de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 1065de551246SJoel Becker BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 1066de551246SJoel Becker 1067de551246SJoel Becker lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 1068de551246SJoel Becker 1069de551246SJoel Becker return lockres->l_pending_gen; 1070de551246SJoel Becker } 1071de551246SJoel Becker 1072c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) 1073ccd979bdSMark Fasheh { 1074a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1075aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1076ccd979bdSMark Fasheh int needs_downconvert; 1077ccd979bdSMark Fasheh unsigned long flags; 1078ccd979bdSMark Fasheh 1079bd3e7610SJoel Becker BUG_ON(level <= DLM_LOCK_NL); 1080ccd979bdSMark Fasheh 10819b915181SSunil Mushran mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " 10829b915181SSunil Mushran "type %s\n", lockres->l_name, level, lockres->l_level, 1083aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 1084aa2623adSMark Fasheh 1085cf8e06f1SMark Fasheh /* 1086cf8e06f1SMark Fasheh * We can skip the bast for locks which don't enable caching - 1087cf8e06f1SMark Fasheh * they'll be dropped at the earliest possible time anyway. 1088cf8e06f1SMark Fasheh */ 1089cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 1090cf8e06f1SMark Fasheh return; 1091cf8e06f1SMark Fasheh 1092ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1093ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 1094ccd979bdSMark Fasheh if (needs_downconvert) 1095ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 1096ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1097ccd979bdSMark Fasheh 1098d680efe9SMark Fasheh wake_up(&lockres->l_event); 1099d680efe9SMark Fasheh 110034d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1101ccd979bdSMark Fasheh } 1102ccd979bdSMark Fasheh 1103c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) 1104ccd979bdSMark Fasheh { 1105a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1106de551246SJoel Becker struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1107ccd979bdSMark Fasheh unsigned long flags; 11081693a5c0SDavid Teigland int status; 1109ccd979bdSMark Fasheh 1110ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1111ccd979bdSMark Fasheh 11121693a5c0SDavid Teigland status = ocfs2_dlm_lock_status(&lockres->l_lksb); 11131693a5c0SDavid Teigland 11141693a5c0SDavid Teigland if (status == -EAGAIN) { 11151693a5c0SDavid Teigland lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 11161693a5c0SDavid Teigland goto out; 11171693a5c0SDavid Teigland } 11181693a5c0SDavid Teigland 11191693a5c0SDavid Teigland if (status) { 11208f2c9c1bSJoel Becker mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 11211693a5c0SDavid Teigland lockres->l_name, status); 1122ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1123ccd979bdSMark Fasheh return; 1124ccd979bdSMark Fasheh } 1125ccd979bdSMark Fasheh 11269b915181SSunil Mushran mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " 11279b915181SSunil Mushran "level %d => %d\n", lockres->l_name, lockres->l_action, 11289b915181SSunil Mushran lockres->l_unlock_action, lockres->l_level, lockres->l_requested); 11299b915181SSunil Mushran 1130ccd979bdSMark Fasheh switch(lockres->l_action) { 1131ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 1132ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 1133e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1134ccd979bdSMark Fasheh break; 1135ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 1136ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 1137ccd979bdSMark Fasheh break; 1138ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 1139ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 1140ccd979bdSMark Fasheh break; 1141ccd979bdSMark Fasheh default: 11429b915181SSunil Mushran mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " 11439b915181SSunil Mushran "flags 0x%lx, unlock: %u\n", 1144e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 1145e92d57dfSMark Fasheh lockres->l_unlock_action); 1146ccd979bdSMark Fasheh BUG(); 1147ccd979bdSMark Fasheh } 11481693a5c0SDavid Teigland out: 1149ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 1150ccd979bdSMark Fasheh * can catch it. */ 1151ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1152ccd979bdSMark Fasheh 1153de551246SJoel Becker /* Did we try to cancel this lock? Clear that state */ 1154de551246SJoel Becker if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1155de551246SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1156de551246SJoel Becker 1157de551246SJoel Becker /* 1158de551246SJoel Becker * We may have beaten the locking functions here. We certainly 1159de551246SJoel Becker * know that dlm_lock() has been called :-) 1160de551246SJoel Becker * Because we can't have two lock calls in flight at once, we 1161de551246SJoel Becker * can use lockres->l_pending_gen. 1162de551246SJoel Becker */ 1163de551246SJoel Becker __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1164de551246SJoel Becker 1165ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1166d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1167ccd979bdSMark Fasheh } 1168ccd979bdSMark Fasheh 1169553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) 1170553b5eb9SJoel Becker { 1171553b5eb9SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1172553b5eb9SJoel Becker unsigned long flags; 1173553b5eb9SJoel Becker 11749b915181SSunil Mushran mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", 11759b915181SSunil Mushran lockres->l_name, lockres->l_unlock_action); 1176553b5eb9SJoel Becker 1177553b5eb9SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1178553b5eb9SJoel Becker if (error) { 1179553b5eb9SJoel Becker mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 1180553b5eb9SJoel Becker "unlock_action %d\n", error, lockres->l_name, 1181553b5eb9SJoel Becker lockres->l_unlock_action); 1182553b5eb9SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1183553b5eb9SJoel Becker return; 1184553b5eb9SJoel Becker } 1185553b5eb9SJoel Becker 1186553b5eb9SJoel Becker switch(lockres->l_unlock_action) { 1187553b5eb9SJoel Becker case OCFS2_UNLOCK_CANCEL_CONVERT: 1188553b5eb9SJoel Becker mlog(0, "Cancel convert success for %s\n", lockres->l_name); 1189553b5eb9SJoel Becker lockres->l_action = OCFS2_AST_INVALID; 1190553b5eb9SJoel Becker /* Downconvert thread may have requeued this lock, we 1191553b5eb9SJoel Becker * need to wake it. */ 1192553b5eb9SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1193553b5eb9SJoel Becker ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 1194553b5eb9SJoel Becker break; 1195553b5eb9SJoel Becker case OCFS2_UNLOCK_DROP_LOCK: 1196553b5eb9SJoel Becker lockres->l_level = DLM_LOCK_IV; 1197553b5eb9SJoel Becker break; 1198553b5eb9SJoel Becker default: 1199553b5eb9SJoel Becker BUG(); 1200553b5eb9SJoel Becker } 1201553b5eb9SJoel Becker 1202553b5eb9SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1203553b5eb9SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1204553b5eb9SJoel Becker wake_up(&lockres->l_event); 1205553b5eb9SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1206553b5eb9SJoel Becker } 1207553b5eb9SJoel Becker 1208553b5eb9SJoel Becker /* 1209553b5eb9SJoel Becker * This is the filesystem locking protocol. It provides the lock handling 1210553b5eb9SJoel Becker * hooks for the underlying DLM. It has a maximum version number. 1211553b5eb9SJoel Becker * The version number allows interoperability with systems running at 1212553b5eb9SJoel Becker * the same major number and an equal or smaller minor number. 1213553b5eb9SJoel Becker * 1214553b5eb9SJoel Becker * Whenever the filesystem does new things with locks (adds or removes a 1215553b5eb9SJoel Becker * lock, orders them differently, does different things underneath a lock), 1216553b5eb9SJoel Becker * the version must be changed. The protocol is negotiated when joining 1217553b5eb9SJoel Becker * the dlm domain. A node may join the domain if its major version is 1218553b5eb9SJoel Becker * identical to all other nodes and its minor version is greater than 1219553b5eb9SJoel Becker * or equal to all other nodes. When its minor version is greater than 1220553b5eb9SJoel Becker * the other nodes, it will run at the minor version specified by the 1221553b5eb9SJoel Becker * other nodes. 1222553b5eb9SJoel Becker * 1223553b5eb9SJoel Becker * If a locking change is made that will not be compatible with older 1224553b5eb9SJoel Becker * versions, the major number must be increased and the minor version set 1225553b5eb9SJoel Becker * to zero. If a change merely adds a behavior that can be disabled when 1226553b5eb9SJoel Becker * speaking to older versions, the minor version must be increased. If a 1227553b5eb9SJoel Becker * change adds a fully backwards compatible change (eg, LVB changes that 1228553b5eb9SJoel Becker * are just ignored by older versions), the version does not need to be 1229553b5eb9SJoel Becker * updated. 1230553b5eb9SJoel Becker */ 1231553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = { 1232553b5eb9SJoel Becker .lp_max_version = { 1233553b5eb9SJoel Becker .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 1234553b5eb9SJoel Becker .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 1235553b5eb9SJoel Becker }, 1236553b5eb9SJoel Becker .lp_lock_ast = ocfs2_locking_ast, 1237553b5eb9SJoel Becker .lp_blocking_ast = ocfs2_blocking_ast, 1238553b5eb9SJoel Becker .lp_unlock_ast = ocfs2_unlock_ast, 1239553b5eb9SJoel Becker }; 1240553b5eb9SJoel Becker 1241553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void) 1242553b5eb9SJoel Becker { 1243553b5eb9SJoel Becker ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); 1244553b5eb9SJoel Becker } 1245553b5eb9SJoel Becker 1246ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1247ccd979bdSMark Fasheh int convert) 1248ccd979bdSMark Fasheh { 1249ccd979bdSMark Fasheh unsigned long flags; 1250ccd979bdSMark Fasheh 1251ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1252ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1253a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1254ccd979bdSMark Fasheh if (convert) 1255ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1256ccd979bdSMark Fasheh else 1257ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1258ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1259ccd979bdSMark Fasheh 1260ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1261ccd979bdSMark Fasheh } 1262ccd979bdSMark Fasheh 1263ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 1264ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1265ccd979bdSMark Fasheh * to do the right thing in that case. 1266ccd979bdSMark Fasheh */ 1267ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 1268ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1269ccd979bdSMark Fasheh int level, 1270bd3e7610SJoel Becker u32 dlm_flags) 1271ccd979bdSMark Fasheh { 1272ccd979bdSMark Fasheh int ret = 0; 1273ccd979bdSMark Fasheh unsigned long flags; 1274de551246SJoel Becker unsigned int gen; 1275ccd979bdSMark Fasheh 1276bd3e7610SJoel Becker mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1277ccd979bdSMark Fasheh dlm_flags); 1278ccd979bdSMark Fasheh 1279ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1280ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1281ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1282ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1283ccd979bdSMark Fasheh goto bail; 1284ccd979bdSMark Fasheh } 1285ccd979bdSMark Fasheh 1286ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1287ccd979bdSMark Fasheh lockres->l_requested = level; 1288ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1289de551246SJoel Becker gen = lockres_set_pending(lockres); 1290ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1291ccd979bdSMark Fasheh 12924670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1293ccd979bdSMark Fasheh level, 1294ccd979bdSMark Fasheh &lockres->l_lksb, 1295ccd979bdSMark Fasheh dlm_flags, 1296ccd979bdSMark Fasheh lockres->l_name, 1297a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1298de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 12997431cd7eSJoel Becker if (ret) { 13007431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1301ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1302ccd979bdSMark Fasheh } 1303ccd979bdSMark Fasheh 13047431cd7eSJoel Becker mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1305ccd979bdSMark Fasheh 1306ccd979bdSMark Fasheh bail: 1307ccd979bdSMark Fasheh return ret; 1308ccd979bdSMark Fasheh } 1309ccd979bdSMark Fasheh 1310ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1311ccd979bdSMark Fasheh int flag) 1312ccd979bdSMark Fasheh { 1313ccd979bdSMark Fasheh unsigned long flags; 1314ccd979bdSMark Fasheh int ret; 1315ccd979bdSMark Fasheh 1316ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1317ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 1318ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1319ccd979bdSMark Fasheh 1320ccd979bdSMark Fasheh return ret; 1321ccd979bdSMark Fasheh } 1322ccd979bdSMark Fasheh 1323ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1324ccd979bdSMark Fasheh 1325ccd979bdSMark Fasheh { 1326ccd979bdSMark Fasheh wait_event(lockres->l_event, 1327ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1328ccd979bdSMark Fasheh } 1329ccd979bdSMark Fasheh 1330ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1331ccd979bdSMark Fasheh 1332ccd979bdSMark Fasheh { 1333ccd979bdSMark Fasheh wait_event(lockres->l_event, 1334ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1335ccd979bdSMark Fasheh } 1336ccd979bdSMark Fasheh 1337ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 1338ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 1339ccd979bdSMark Fasheh * level will be compatible with it. */ 1340ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1341ccd979bdSMark Fasheh int wanted) 1342ccd979bdSMark Fasheh { 1343ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1344ccd979bdSMark Fasheh 1345ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1346ccd979bdSMark Fasheh } 1347ccd979bdSMark Fasheh 1348ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1349ccd979bdSMark Fasheh { 1350ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 1351ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 13528ddb7b00SSunil Mushran ocfs2_init_start_time(mw); 1353ccd979bdSMark Fasheh } 1354ccd979bdSMark Fasheh 1355ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1356ccd979bdSMark Fasheh { 1357ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 1358ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 135916735d02SWolfram Sang reinit_completion(&mw->mw_complete); 1360ccd979bdSMark Fasheh return mw->mw_status; 1361ccd979bdSMark Fasheh } 1362ccd979bdSMark Fasheh 1363ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1364ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 1365ccd979bdSMark Fasheh unsigned long mask, 1366ccd979bdSMark Fasheh unsigned long goal) 1367ccd979bdSMark Fasheh { 1368ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 1369ccd979bdSMark Fasheh 1370ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 1371ccd979bdSMark Fasheh 1372ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1373ccd979bdSMark Fasheh mw->mw_mask = mask; 1374ccd979bdSMark Fasheh mw->mw_goal = goal; 1375ccd979bdSMark Fasheh } 1376ccd979bdSMark Fasheh 1377ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1378ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 1379d1e78238SXue jiufei static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1380ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 1381ccd979bdSMark Fasheh { 1382ccd979bdSMark Fasheh int ret = 0; 1383ccd979bdSMark Fasheh 1384d1e78238SXue jiufei assert_spin_locked(&lockres->l_lock); 1385ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 1386ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1387ccd979bdSMark Fasheh ret = -EBUSY; 1388ccd979bdSMark Fasheh 1389ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 1390ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1391ccd979bdSMark Fasheh } 1392d1e78238SXue jiufei 1393d1e78238SXue jiufei return ret; 1394d1e78238SXue jiufei } 1395d1e78238SXue jiufei 1396d1e78238SXue jiufei static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1397d1e78238SXue jiufei struct ocfs2_mask_waiter *mw) 1398d1e78238SXue jiufei { 1399d1e78238SXue jiufei unsigned long flags; 1400d1e78238SXue jiufei int ret = 0; 1401d1e78238SXue jiufei 1402d1e78238SXue jiufei spin_lock_irqsave(&lockres->l_lock, flags); 1403d1e78238SXue jiufei ret = __lockres_remove_mask_waiter(lockres, mw); 1404ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1405ccd979bdSMark Fasheh 1406ccd979bdSMark Fasheh return ret; 1407ccd979bdSMark Fasheh 1408ccd979bdSMark Fasheh } 1409ccd979bdSMark Fasheh 1410cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1411cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres) 1412cf8e06f1SMark Fasheh { 1413cf8e06f1SMark Fasheh int ret; 1414cf8e06f1SMark Fasheh 1415cf8e06f1SMark Fasheh ret = wait_for_completion_interruptible(&mw->mw_complete); 1416cf8e06f1SMark Fasheh if (ret) 1417cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, mw); 1418cf8e06f1SMark Fasheh else 1419cf8e06f1SMark Fasheh ret = mw->mw_status; 1420cf8e06f1SMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 142116735d02SWolfram Sang reinit_completion(&mw->mw_complete); 1422cf8e06f1SMark Fasheh return ret; 1423cf8e06f1SMark Fasheh } 1424cf8e06f1SMark Fasheh 1425cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb, 1426ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1427ccd979bdSMark Fasheh int level, 1428bd3e7610SJoel Becker u32 lkm_flags, 1429cb25797dSJan Kara int arg_flags, 1430cb25797dSJan Kara int l_subclass, 1431cb25797dSJan Kara unsigned long caller_ip) 1432ccd979bdSMark Fasheh { 1433ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 1434ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1435ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1436ccd979bdSMark Fasheh unsigned long flags; 1437de551246SJoel Becker unsigned int gen; 14381693a5c0SDavid Teigland int noqueue_attempted = 0; 1439d1e78238SXue jiufei int dlm_locked = 0; 1440b1b1e15eSTariq Saeed int kick_dc = 0; 1441ccd979bdSMark Fasheh 14422f2eca20Salex chen if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { 14432f2eca20Salex chen mlog_errno(-EINVAL); 14442f2eca20Salex chen return -EINVAL; 14452f2eca20Salex chen } 14462f2eca20Salex chen 1447ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 1448ccd979bdSMark Fasheh 1449b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1450bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 1451b80fc012SMark Fasheh 1452ccd979bdSMark Fasheh again: 1453ccd979bdSMark Fasheh wait = 0; 1454ccd979bdSMark Fasheh 1455a1912826SSunil Mushran spin_lock_irqsave(&lockres->l_lock, flags); 1456a1912826SSunil Mushran 1457ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 1458ccd979bdSMark Fasheh ret = -ERESTARTSYS; 1459a1912826SSunil Mushran goto unlock; 1460ccd979bdSMark Fasheh } 1461ccd979bdSMark Fasheh 1462ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1463ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 1464ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 1465ccd979bdSMark Fasheh 1466ccd979bdSMark Fasheh /* We only compare against the currently granted level 1467ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 1468ccd979bdSMark Fasheh * we'll get caught below. */ 1469ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 1470ccd979bdSMark Fasheh level > lockres->l_level) { 1471ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 1472ccd979bdSMark Fasheh * them. */ 1473ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1474ccd979bdSMark Fasheh wait = 1; 1475ccd979bdSMark Fasheh goto unlock; 1476ccd979bdSMark Fasheh } 1477ccd979bdSMark Fasheh 1478a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { 1479a1912826SSunil Mushran /* 1480a1912826SSunil Mushran * We've upconverted. If the lock now has a level we can 1481a1912826SSunil Mushran * work with, we take it. If, however, the lock is not at the 1482a1912826SSunil Mushran * required level, we go thru the full cycle. One way this could 1483a1912826SSunil Mushran * happen is if a process requesting an upconvert to PR is 1484a1912826SSunil Mushran * closely followed by another requesting upconvert to an EX. 1485a1912826SSunil Mushran * If the process requesting EX lands here, we want it to 1486a1912826SSunil Mushran * continue attempting to upconvert and let the process 1487a1912826SSunil Mushran * requesting PR take the lock. 1488a1912826SSunil Mushran * If multiple processes request upconvert to PR, the first one 1489a1912826SSunil Mushran * here will take the lock. The others will have to go thru the 1490a1912826SSunil Mushran * OCFS2_LOCK_BLOCKED check to ensure that there is no pending 1491a1912826SSunil Mushran * downconvert request. 1492a1912826SSunil Mushran */ 1493a1912826SSunil Mushran if (level <= lockres->l_level) 1494a1912826SSunil Mushran goto update_holders; 1495a1912826SSunil Mushran } 1496a1912826SSunil Mushran 1497ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1498ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1499ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 1500ccd979bdSMark Fasheh * another node */ 1501ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1502ccd979bdSMark Fasheh wait = 1; 1503ccd979bdSMark Fasheh goto unlock; 1504ccd979bdSMark Fasheh } 1505ccd979bdSMark Fasheh 1506ccd979bdSMark Fasheh if (level > lockres->l_level) { 15071693a5c0SDavid Teigland if (noqueue_attempted > 0) { 15081693a5c0SDavid Teigland ret = -EAGAIN; 15091693a5c0SDavid Teigland goto unlock; 15101693a5c0SDavid Teigland } 15111693a5c0SDavid Teigland if (lkm_flags & DLM_LKF_NOQUEUE) 15121693a5c0SDavid Teigland noqueue_attempted = 1; 15131693a5c0SDavid Teigland 1514ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 1515ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 1516ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 1517ccd979bdSMark Fasheh 1518019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1519019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1520bd3e7610SJoel Becker lkm_flags &= ~DLM_LKF_CONVERT; 1521019d1b22SMark Fasheh } else { 1522ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1523bd3e7610SJoel Becker lkm_flags |= DLM_LKF_CONVERT; 1524019d1b22SMark Fasheh } 1525019d1b22SMark Fasheh 1526ccd979bdSMark Fasheh lockres->l_requested = level; 1527ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1528de551246SJoel Becker gen = lockres_set_pending(lockres); 1529ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1530ccd979bdSMark Fasheh 1531bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_IV); 1532bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_NL); 1533ccd979bdSMark Fasheh 15349b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", 1535ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1536ccd979bdSMark Fasheh 1537ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 15384670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1539ccd979bdSMark Fasheh level, 1540ccd979bdSMark Fasheh &lockres->l_lksb, 1541019d1b22SMark Fasheh lkm_flags, 1542ccd979bdSMark Fasheh lockres->l_name, 1543a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1544de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 15457431cd7eSJoel Becker if (ret) { 15467431cd7eSJoel Becker if (!(lkm_flags & DLM_LKF_NOQUEUE) || 15477431cd7eSJoel Becker (ret != -EAGAIN)) { 154824ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", 15497431cd7eSJoel Becker ret, lockres); 1550ccd979bdSMark Fasheh } 1551ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1552ccd979bdSMark Fasheh goto out; 1553ccd979bdSMark Fasheh } 1554d1e78238SXue jiufei dlm_locked = 1; 1555ccd979bdSMark Fasheh 155673ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1557ccd979bdSMark Fasheh lockres->l_name); 1558ccd979bdSMark Fasheh 1559ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1560ccd979bdSMark Fasheh * complete our work regardless. */ 1561ccd979bdSMark Fasheh catch_signals = 0; 1562ccd979bdSMark Fasheh 1563ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1564ccd979bdSMark Fasheh goto again; 1565ccd979bdSMark Fasheh } 1566ccd979bdSMark Fasheh 1567a1912826SSunil Mushran update_holders: 1568ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1569ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1570ccd979bdSMark Fasheh 1571ccd979bdSMark Fasheh ret = 0; 1572ccd979bdSMark Fasheh unlock: 1573a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1574a1912826SSunil Mushran 1575b1b1e15eSTariq Saeed /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */ 1576b1b1e15eSTariq Saeed kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED); 1577b1b1e15eSTariq Saeed 1578ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1579b1b1e15eSTariq Saeed if (kick_dc) 1580b1b1e15eSTariq Saeed ocfs2_wake_downconvert_thread(osb); 1581ccd979bdSMark Fasheh out: 1582ccd979bdSMark Fasheh /* 1583ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1584ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1585ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1586ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1587ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1588ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1589ccd979bdSMark Fasheh */ 1590ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1591ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1592ccd979bdSMark Fasheh wait = 0; 1593d1e78238SXue jiufei spin_lock_irqsave(&lockres->l_lock, flags); 1594d1e78238SXue jiufei if (__lockres_remove_mask_waiter(lockres, &mw)) { 1595d1e78238SXue jiufei if (dlm_locked) 1596d1e78238SXue jiufei lockres_or_flags(lockres, 1597d1e78238SXue jiufei OCFS2_LOCK_NONBLOCK_FINISHED); 1598d1e78238SXue jiufei spin_unlock_irqrestore(&lockres->l_lock, flags); 1599ccd979bdSMark Fasheh ret = -EAGAIN; 1600d1e78238SXue jiufei } else { 1601d1e78238SXue jiufei spin_unlock_irqrestore(&lockres->l_lock, flags); 1602ccd979bdSMark Fasheh goto again; 1603ccd979bdSMark Fasheh } 1604d1e78238SXue jiufei } 1605ccd979bdSMark Fasheh if (wait) { 1606ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1607ccd979bdSMark Fasheh if (ret == 0) 1608ccd979bdSMark Fasheh goto again; 1609ccd979bdSMark Fasheh mlog_errno(ret); 1610ccd979bdSMark Fasheh } 16118ddb7b00SSunil Mushran ocfs2_update_lock_stats(lockres, level, &mw, ret); 1612ccd979bdSMark Fasheh 1613cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1614cb25797dSJan Kara if (!ret && lockres->l_lockdep_map.key != NULL) { 1615cb25797dSJan Kara if (level == DLM_LOCK_PR) 1616cb25797dSJan Kara rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, 1617cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1618cb25797dSJan Kara caller_ip); 1619cb25797dSJan Kara else 1620cb25797dSJan Kara rwsem_acquire(&lockres->l_lockdep_map, l_subclass, 1621cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1622cb25797dSJan Kara caller_ip); 1623cb25797dSJan Kara } 1624cb25797dSJan Kara #endif 1625ccd979bdSMark Fasheh return ret; 1626ccd979bdSMark Fasheh } 1627ccd979bdSMark Fasheh 1628cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, 1629ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1630cb25797dSJan Kara int level, 1631cb25797dSJan Kara u32 lkm_flags, 1632cb25797dSJan Kara int arg_flags) 1633cb25797dSJan Kara { 1634cb25797dSJan Kara return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, 1635cb25797dSJan Kara 0, _RET_IP_); 1636cb25797dSJan Kara } 1637cb25797dSJan Kara 1638cb25797dSJan Kara 1639cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 1640cb25797dSJan Kara struct ocfs2_lock_res *lockres, 1641cb25797dSJan Kara int level, 1642cb25797dSJan Kara unsigned long caller_ip) 1643ccd979bdSMark Fasheh { 1644ccd979bdSMark Fasheh unsigned long flags; 1645ccd979bdSMark Fasheh 1646ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1647ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 164834d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1649ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1650cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1651cb25797dSJan Kara if (lockres->l_lockdep_map.key != NULL) 1652cb25797dSJan Kara rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1653cb25797dSJan Kara #endif 1654ccd979bdSMark Fasheh } 1655ccd979bdSMark Fasheh 1656da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1657d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 165824c19ef4SMark Fasheh int ex, 165924c19ef4SMark Fasheh int local) 1660ccd979bdSMark Fasheh { 1661bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1662ccd979bdSMark Fasheh unsigned long flags; 1663bd3e7610SJoel Becker u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1664ccd979bdSMark Fasheh 1665ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1666ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1667ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1668ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1669ccd979bdSMark Fasheh 167024c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1671ccd979bdSMark Fasheh } 1672ccd979bdSMark Fasheh 1673ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1674ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1675ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1676ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1677ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1678ccd979bdSMark Fasheh * with creating a new lock resource. */ 1679ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1680ccd979bdSMark Fasheh { 1681ccd979bdSMark Fasheh int ret; 1682d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1683ccd979bdSMark Fasheh 1684ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1685ccd979bdSMark Fasheh 1686b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1687ccd979bdSMark Fasheh 1688ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1689ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1690ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1691ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1692ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1693ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1694ccd979bdSMark Fasheh * valid when we release the EX. */ 1695ccd979bdSMark Fasheh 169624c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1697ccd979bdSMark Fasheh if (ret) { 1698ccd979bdSMark Fasheh mlog_errno(ret); 1699ccd979bdSMark Fasheh goto bail; 1700ccd979bdSMark Fasheh } 1701ccd979bdSMark Fasheh 170224c19ef4SMark Fasheh /* 1703bd3e7610SJoel Becker * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 170424c19ef4SMark Fasheh * don't use a generation in their lock names. 170524c19ef4SMark Fasheh */ 1706e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1707ccd979bdSMark Fasheh if (ret) { 1708ccd979bdSMark Fasheh mlog_errno(ret); 1709ccd979bdSMark Fasheh goto bail; 1710ccd979bdSMark Fasheh } 1711ccd979bdSMark Fasheh 171250008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 1713a8f24f1bSJoseph Qi if (ret) 171450008630STiger Yang mlog_errno(ret); 171550008630STiger Yang 1716ccd979bdSMark Fasheh bail: 1717ccd979bdSMark Fasheh return ret; 1718ccd979bdSMark Fasheh } 1719ccd979bdSMark Fasheh 1720ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1721ccd979bdSMark Fasheh { 1722ccd979bdSMark Fasheh int status, level; 1723ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1724c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1725ccd979bdSMark Fasheh 1726b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1727b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1728ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1729ccd979bdSMark Fasheh 1730c1e8d35eSTao Ma if (ocfs2_mount_local(osb)) 1731c271c5c2SSunil Mushran return 0; 1732c271c5c2SSunil Mushran 1733ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1734ccd979bdSMark Fasheh 1735bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1736ccd979bdSMark Fasheh 1737ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1738ccd979bdSMark Fasheh 0); 1739ccd979bdSMark Fasheh if (status < 0) 1740ccd979bdSMark Fasheh mlog_errno(status); 1741ccd979bdSMark Fasheh 1742ccd979bdSMark Fasheh return status; 1743ccd979bdSMark Fasheh } 1744ccd979bdSMark Fasheh 1745ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1746ccd979bdSMark Fasheh { 1747bd3e7610SJoel Becker int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1748ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1749c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1750ccd979bdSMark Fasheh 1751b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1752b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1753ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1754ccd979bdSMark Fasheh 1755c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1756ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1757ccd979bdSMark Fasheh } 1758ccd979bdSMark Fasheh 175950008630STiger Yang /* 176050008630STiger Yang * ocfs2_open_lock always get PR mode lock. 176150008630STiger Yang */ 176250008630STiger Yang int ocfs2_open_lock(struct inode *inode) 176350008630STiger Yang { 176450008630STiger Yang int status = 0; 176550008630STiger Yang struct ocfs2_lock_res *lockres; 176650008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 176750008630STiger Yang 176850008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 176950008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 177050008630STiger Yang 177103efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 177250008630STiger Yang goto out; 177350008630STiger Yang 177450008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 177550008630STiger Yang 177650008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1777bd3e7610SJoel Becker DLM_LOCK_PR, 0, 0); 177850008630STiger Yang if (status < 0) 177950008630STiger Yang mlog_errno(status); 178050008630STiger Yang 178150008630STiger Yang out: 178250008630STiger Yang return status; 178350008630STiger Yang } 178450008630STiger Yang 178550008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 178650008630STiger Yang { 178750008630STiger Yang int status = 0, level; 178850008630STiger Yang struct ocfs2_lock_res *lockres; 178950008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 179050008630STiger Yang 179150008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 179250008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 179350008630STiger Yang write ? "EXMODE" : "PRMODE"); 179450008630STiger Yang 179503efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb)) { 179603efed8aSTiger Yang if (write) 179703efed8aSTiger Yang status = -EROFS; 179803efed8aSTiger Yang goto out; 179903efed8aSTiger Yang } 180003efed8aSTiger Yang 180150008630STiger Yang if (ocfs2_mount_local(osb)) 180250008630STiger Yang goto out; 180350008630STiger Yang 180450008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 180550008630STiger Yang 1806bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 180750008630STiger Yang 180850008630STiger Yang /* 180950008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 1810bd3e7610SJoel Becker * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 181150008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 181250008630STiger Yang * this inode is still in use. 181350008630STiger Yang */ 181450008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1815bd3e7610SJoel Becker level, DLM_LKF_NOQUEUE, 0); 181650008630STiger Yang 181750008630STiger Yang out: 181850008630STiger Yang return status; 181950008630STiger Yang } 182050008630STiger Yang 182150008630STiger Yang /* 182250008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 182350008630STiger Yang */ 182450008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 182550008630STiger Yang { 182650008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 182750008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 182850008630STiger Yang 182950008630STiger Yang mlog(0, "inode %llu drop open lock\n", 183050008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 183150008630STiger Yang 183250008630STiger Yang if (ocfs2_mount_local(osb)) 183350008630STiger Yang goto out; 183450008630STiger Yang 183550008630STiger Yang if(lockres->l_ro_holders) 183650008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1837bd3e7610SJoel Becker DLM_LOCK_PR); 183850008630STiger Yang if(lockres->l_ex_holders) 183950008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1840bd3e7610SJoel Becker DLM_LOCK_EX); 184150008630STiger Yang 184250008630STiger Yang out: 1843c1e8d35eSTao Ma return; 184450008630STiger Yang } 184550008630STiger Yang 1846cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1847cf8e06f1SMark Fasheh int level) 1848cf8e06f1SMark Fasheh { 1849cf8e06f1SMark Fasheh int ret; 1850cf8e06f1SMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1851cf8e06f1SMark Fasheh unsigned long flags; 1852cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1853cf8e06f1SMark Fasheh 1854cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1855cf8e06f1SMark Fasheh 1856cf8e06f1SMark Fasheh retry_cancel: 1857cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1858cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1859cf8e06f1SMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 1860cf8e06f1SMark Fasheh if (ret) { 1861cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1862cf8e06f1SMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 1863cf8e06f1SMark Fasheh if (ret < 0) { 1864cf8e06f1SMark Fasheh mlog_errno(ret); 1865cf8e06f1SMark Fasheh goto out; 1866cf8e06f1SMark Fasheh } 1867cf8e06f1SMark Fasheh goto retry_cancel; 1868cf8e06f1SMark Fasheh } 1869cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1870cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1871cf8e06f1SMark Fasheh 1872cf8e06f1SMark Fasheh ocfs2_wait_for_mask(&mw); 1873cf8e06f1SMark Fasheh goto retry_cancel; 1874cf8e06f1SMark Fasheh } 1875cf8e06f1SMark Fasheh 1876cf8e06f1SMark Fasheh ret = -ERESTARTSYS; 1877cf8e06f1SMark Fasheh /* 1878cf8e06f1SMark Fasheh * We may still have gotten the lock, in which case there's no 1879cf8e06f1SMark Fasheh * point to restarting the syscall. 1880cf8e06f1SMark Fasheh */ 1881cf8e06f1SMark Fasheh if (lockres->l_level == level) 1882cf8e06f1SMark Fasheh ret = 0; 1883cf8e06f1SMark Fasheh 1884cf8e06f1SMark Fasheh mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1885cf8e06f1SMark Fasheh lockres->l_flags, lockres->l_level, lockres->l_action); 1886cf8e06f1SMark Fasheh 1887cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1888cf8e06f1SMark Fasheh 1889cf8e06f1SMark Fasheh out: 1890cf8e06f1SMark Fasheh return ret; 1891cf8e06f1SMark Fasheh } 1892cf8e06f1SMark Fasheh 1893cf8e06f1SMark Fasheh /* 1894cf8e06f1SMark Fasheh * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1895cf8e06f1SMark Fasheh * flock() calls. The locking approach this requires is sufficiently 1896cf8e06f1SMark Fasheh * different from all other cluster lock types that we implement a 18973ad2f3fbSDaniel Mack * separate path to the "low-level" dlm calls. In particular: 1898cf8e06f1SMark Fasheh * 1899cf8e06f1SMark Fasheh * - No optimization of lock levels is done - we take at exactly 1900cf8e06f1SMark Fasheh * what's been requested. 1901cf8e06f1SMark Fasheh * 1902cf8e06f1SMark Fasheh * - No lock caching is employed. We immediately downconvert to 1903cf8e06f1SMark Fasheh * no-lock at unlock time. This also means flock locks never go on 1904cf8e06f1SMark Fasheh * the blocking list). 1905cf8e06f1SMark Fasheh * 1906cf8e06f1SMark Fasheh * - Since userspace can trivially deadlock itself with flock, we make 1907cf8e06f1SMark Fasheh * sure to allow cancellation of a misbehaving applications flock() 1908cf8e06f1SMark Fasheh * request. 1909cf8e06f1SMark Fasheh * 1910cf8e06f1SMark Fasheh * - Access to any flock lockres doesn't require concurrency, so we 1911cf8e06f1SMark Fasheh * can simplify the code by requiring the caller to guarantee 1912cf8e06f1SMark Fasheh * serialization of dlmglue flock calls. 1913cf8e06f1SMark Fasheh */ 1914cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock) 1915cf8e06f1SMark Fasheh { 1916e988cf1cSMark Fasheh int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1917e988cf1cSMark Fasheh unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1918cf8e06f1SMark Fasheh unsigned long flags; 1919cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1920cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1921cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1922cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1923cf8e06f1SMark Fasheh 1924cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1925cf8e06f1SMark Fasheh 1926cf8e06f1SMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1927bd3e7610SJoel Becker (lockres->l_level > DLM_LOCK_NL)) { 1928cf8e06f1SMark Fasheh mlog(ML_ERROR, 1929cf8e06f1SMark Fasheh "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1930cf8e06f1SMark Fasheh "level: %u\n", lockres->l_name, lockres->l_flags, 1931cf8e06f1SMark Fasheh lockres->l_level); 1932cf8e06f1SMark Fasheh return -EINVAL; 1933cf8e06f1SMark Fasheh } 1934cf8e06f1SMark Fasheh 1935cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1936cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1937cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1938cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1939cf8e06f1SMark Fasheh 1940cf8e06f1SMark Fasheh /* 1941cf8e06f1SMark Fasheh * Get the lock at NLMODE to start - that way we 1942cf8e06f1SMark Fasheh * can cancel the upconvert request if need be. 1943cf8e06f1SMark Fasheh */ 1944e988cf1cSMark Fasheh ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1945cf8e06f1SMark Fasheh if (ret < 0) { 1946cf8e06f1SMark Fasheh mlog_errno(ret); 1947cf8e06f1SMark Fasheh goto out; 1948cf8e06f1SMark Fasheh } 1949cf8e06f1SMark Fasheh 1950cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1951cf8e06f1SMark Fasheh if (ret) { 1952cf8e06f1SMark Fasheh mlog_errno(ret); 1953cf8e06f1SMark Fasheh goto out; 1954cf8e06f1SMark Fasheh } 1955cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1956cf8e06f1SMark Fasheh } 1957cf8e06f1SMark Fasheh 1958cf8e06f1SMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1959e988cf1cSMark Fasheh lkm_flags |= DLM_LKF_CONVERT; 1960cf8e06f1SMark Fasheh lockres->l_requested = level; 1961cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1962cf8e06f1SMark Fasheh 1963cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1964cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1965cf8e06f1SMark Fasheh 19664670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1967a796d286SJoel Becker lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); 19687431cd7eSJoel Becker if (ret) { 19697431cd7eSJoel Becker if (!trylock || (ret != -EAGAIN)) { 197024ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1971cf8e06f1SMark Fasheh ret = -EINVAL; 1972cf8e06f1SMark Fasheh } 1973cf8e06f1SMark Fasheh 1974cf8e06f1SMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1975cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, &mw); 1976cf8e06f1SMark Fasheh goto out; 1977cf8e06f1SMark Fasheh } 1978cf8e06f1SMark Fasheh 1979cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1980cf8e06f1SMark Fasheh if (ret == -ERESTARTSYS) { 1981cf8e06f1SMark Fasheh /* 1982cf8e06f1SMark Fasheh * Userspace can cause deadlock itself with 1983cf8e06f1SMark Fasheh * flock(). Current behavior locally is to allow the 1984cf8e06f1SMark Fasheh * deadlock, but abort the system call if a signal is 1985cf8e06f1SMark Fasheh * received. We follow this example, otherwise a 1986cf8e06f1SMark Fasheh * poorly written program could sit in kernel until 1987cf8e06f1SMark Fasheh * reboot. 1988cf8e06f1SMark Fasheh * 1989cf8e06f1SMark Fasheh * Handling this is a bit more complicated for Ocfs2 1990cf8e06f1SMark Fasheh * though. We can't exit this function with an 1991cf8e06f1SMark Fasheh * outstanding lock request, so a cancel convert is 1992cf8e06f1SMark Fasheh * required. We intentionally overwrite 'ret' - if the 1993cf8e06f1SMark Fasheh * cancel fails and the lock was granted, it's easier 1994af901ca1SAndré Goddard Rosa * to just bubble success back up to the user. 1995cf8e06f1SMark Fasheh */ 1996cf8e06f1SMark Fasheh ret = ocfs2_flock_handle_signal(lockres, level); 19971693a5c0SDavid Teigland } else if (!ret && (level > lockres->l_level)) { 19981693a5c0SDavid Teigland /* Trylock failed asynchronously */ 19991693a5c0SDavid Teigland BUG_ON(!trylock); 20001693a5c0SDavid Teigland ret = -EAGAIN; 2001cf8e06f1SMark Fasheh } 2002cf8e06f1SMark Fasheh 2003cf8e06f1SMark Fasheh out: 2004cf8e06f1SMark Fasheh 2005cf8e06f1SMark Fasheh mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 2006cf8e06f1SMark Fasheh lockres->l_name, ex, trylock, ret); 2007cf8e06f1SMark Fasheh return ret; 2008cf8e06f1SMark Fasheh } 2009cf8e06f1SMark Fasheh 2010cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file) 2011cf8e06f1SMark Fasheh { 2012cf8e06f1SMark Fasheh int ret; 2013de551246SJoel Becker unsigned int gen; 2014cf8e06f1SMark Fasheh unsigned long flags; 2015cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 2016cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 2017cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 2018cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 2019cf8e06f1SMark Fasheh 2020cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 2021cf8e06f1SMark Fasheh 2022cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 2023cf8e06f1SMark Fasheh return; 2024cf8e06f1SMark Fasheh 2025e988cf1cSMark Fasheh if (lockres->l_level == DLM_LOCK_NL) 2026cf8e06f1SMark Fasheh return; 2027cf8e06f1SMark Fasheh 2028cf8e06f1SMark Fasheh mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 2029cf8e06f1SMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_level, 2030cf8e06f1SMark Fasheh lockres->l_action); 2031cf8e06f1SMark Fasheh 2032cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2033cf8e06f1SMark Fasheh /* 2034cf8e06f1SMark Fasheh * Fake a blocking ast for the downconvert code. 2035cf8e06f1SMark Fasheh */ 2036cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 2037bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_EX; 2038cf8e06f1SMark Fasheh 2039e988cf1cSMark Fasheh gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 2040cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 2041cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2042cf8e06f1SMark Fasheh 2043e988cf1cSMark Fasheh ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 2044cf8e06f1SMark Fasheh if (ret) { 2045cf8e06f1SMark Fasheh mlog_errno(ret); 2046cf8e06f1SMark Fasheh return; 2047cf8e06f1SMark Fasheh } 2048cf8e06f1SMark Fasheh 2049cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 2050cf8e06f1SMark Fasheh if (ret) 2051cf8e06f1SMark Fasheh mlog_errno(ret); 2052cf8e06f1SMark Fasheh } 2053cf8e06f1SMark Fasheh 205434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 2055ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2056ccd979bdSMark Fasheh { 2057ccd979bdSMark Fasheh int kick = 0; 2058ccd979bdSMark Fasheh 2059ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 206034d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 2061ccd979bdSMark Fasheh * condition. */ 2062ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 2063ccd979bdSMark Fasheh switch(lockres->l_blocking) { 2064bd3e7610SJoel Becker case DLM_LOCK_EX: 2065ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 2066ccd979bdSMark Fasheh kick = 1; 2067ccd979bdSMark Fasheh break; 2068bd3e7610SJoel Becker case DLM_LOCK_PR: 2069ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 2070ccd979bdSMark Fasheh kick = 1; 2071ccd979bdSMark Fasheh break; 2072ccd979bdSMark Fasheh default: 2073ccd979bdSMark Fasheh BUG(); 2074ccd979bdSMark Fasheh } 2075ccd979bdSMark Fasheh } 2076ccd979bdSMark Fasheh 2077ccd979bdSMark Fasheh if (kick) 207834d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 2079ccd979bdSMark Fasheh } 2080ccd979bdSMark Fasheh 2081ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 2082ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 2083ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 2084ccd979bdSMark Fasheh 2085ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 2086ccd979bdSMark Fasheh * now. */ 2087ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 2088ccd979bdSMark Fasheh { 2089ccd979bdSMark Fasheh u64 res; 2090ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 2091ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 2092ccd979bdSMark Fasheh 2093ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 2094ccd979bdSMark Fasheh 2095ccd979bdSMark Fasheh return res; 2096ccd979bdSMark Fasheh } 2097ccd979bdSMark Fasheh 2098ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 2099ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 2100e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 2101ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 2102ccd979bdSMark Fasheh { 2103ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2104e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2105ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2106ccd979bdSMark Fasheh 2107a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2108ccd979bdSMark Fasheh 210924c19ef4SMark Fasheh /* 211024c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 211124c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 211224c19ef4SMark Fasheh * status. 211324c19ef4SMark Fasheh */ 211424c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 211524c19ef4SMark Fasheh lvb->lvb_version = 0; 211624c19ef4SMark Fasheh goto out; 211724c19ef4SMark Fasheh } 211824c19ef4SMark Fasheh 21194d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 2120ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2121ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 212203ab30f7SEric W. Biederman lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode)); 212303ab30f7SEric W. Biederman lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); 2124ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2125ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2126ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 2127ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 2128ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 2129ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 2130ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 2131ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 2132ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 213315b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 2134f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 2135ccd979bdSMark Fasheh 213624c19ef4SMark Fasheh out: 2137ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2138ccd979bdSMark Fasheh } 2139ccd979bdSMark Fasheh 2140ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 2141ccd979bdSMark Fasheh u64 packed_time) 2142ccd979bdSMark Fasheh { 2143ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 2144ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 2145ccd979bdSMark Fasheh } 2146ccd979bdSMark Fasheh 2147ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 2148ccd979bdSMark Fasheh { 2149ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2150e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2151ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2152ccd979bdSMark Fasheh 2153ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2154ccd979bdSMark Fasheh 2155a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2156ccd979bdSMark Fasheh 2157ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 2158ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2159ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 2160ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 2161ccd979bdSMark Fasheh 2162ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 216315b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 2164ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 2165ca4d147eSHerbert Poetzl 2166ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 2167ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 2168ccd979bdSMark Fasheh inode->i_blocks = 0; 2169ccd979bdSMark Fasheh else 21708110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 2171ccd979bdSMark Fasheh 217203ab30f7SEric W. Biederman i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid)); 217303ab30f7SEric W. Biederman i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); 2174ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2175bfe86848SMiklos Szeredi set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); 2176ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 2177ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 2178ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 2179ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 2180ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 2181ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 2182ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2183ccd979bdSMark Fasheh } 2184ccd979bdSMark Fasheh 2185f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2186f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 2187ccd979bdSMark Fasheh { 2188a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2189ccd979bdSMark Fasheh 21901c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) 21911c520dfbSJoel Becker && lvb->lvb_version == OCFS2_LVB_VERSION 2192f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2193ccd979bdSMark Fasheh return 1; 2194ccd979bdSMark Fasheh return 0; 2195ccd979bdSMark Fasheh } 2196ccd979bdSMark Fasheh 2197ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 2198ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 2199ccd979bdSMark Fasheh * 2200ccd979bdSMark Fasheh * 0 means no refresh needed. 2201ccd979bdSMark Fasheh * 2202ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 2203ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 2204ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 2205ccd979bdSMark Fasheh { 2206ccd979bdSMark Fasheh unsigned long flags; 2207ccd979bdSMark Fasheh int status = 0; 2208ccd979bdSMark Fasheh 2209ccd979bdSMark Fasheh refresh_check: 2210ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2211ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2212ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2213ccd979bdSMark Fasheh goto bail; 2214ccd979bdSMark Fasheh } 2215ccd979bdSMark Fasheh 2216ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 2217ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2218ccd979bdSMark Fasheh 2219ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 2220ccd979bdSMark Fasheh goto refresh_check; 2221ccd979bdSMark Fasheh } 2222ccd979bdSMark Fasheh 2223ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 2224ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 2225ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2226ccd979bdSMark Fasheh 2227ccd979bdSMark Fasheh status = 1; 2228ccd979bdSMark Fasheh bail: 2229c1e8d35eSTao Ma mlog(0, "status %d\n", status); 2230ccd979bdSMark Fasheh return status; 2231ccd979bdSMark Fasheh } 2232ccd979bdSMark Fasheh 2233ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 2234ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 2235ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2236ccd979bdSMark Fasheh int status) 2237ccd979bdSMark Fasheh { 2238ccd979bdSMark Fasheh unsigned long flags; 2239ccd979bdSMark Fasheh 2240ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2241ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2242ccd979bdSMark Fasheh if (!status) 2243ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2244ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2245ccd979bdSMark Fasheh 2246ccd979bdSMark Fasheh wake_up(&lockres->l_event); 2247ccd979bdSMark Fasheh } 2248ccd979bdSMark Fasheh 2249ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 2250e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 2251ccd979bdSMark Fasheh struct buffer_head **bh) 2252ccd979bdSMark Fasheh { 2253ccd979bdSMark Fasheh int status = 0; 2254ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2255e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2256ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 2257c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2258ccd979bdSMark Fasheh 2259be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 2260be9e986bSMark Fasheh goto bail; 2261be9e986bSMark Fasheh 2262ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2263ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 2264b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 2265ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 2266b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 2267ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2268ccd979bdSMark Fasheh status = -ENOENT; 2269ccd979bdSMark Fasheh goto bail; 2270ccd979bdSMark Fasheh } 2271ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2272ccd979bdSMark Fasheh 2273ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 2274ccd979bdSMark Fasheh goto bail; 2275ccd979bdSMark Fasheh 2276ccd979bdSMark Fasheh /* This will discard any caching information we might have had 2277ccd979bdSMark Fasheh * for the inode metadata. */ 22788cb471e8SJoel Becker ocfs2_metadata_cache_purge(INODE_CACHE(inode)); 2279ccd979bdSMark Fasheh 228083418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 228183418978SMark Fasheh 2282be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2283b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 2284b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 2285ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 2286ccd979bdSMark Fasheh } else { 2287ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 2288ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 2289b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, bh); 2290ccd979bdSMark Fasheh if (status < 0) { 2291ccd979bdSMark Fasheh mlog_errno(status); 2292ccd979bdSMark Fasheh goto bail_refresh; 2293ccd979bdSMark Fasheh } 2294ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 2295ccd979bdSMark Fasheh 2296ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 2297b657c95cSJoel Becker * locking an invalid object. ocfs2_read_inode_block() 2298b657c95cSJoel Becker * already checked that the inode block is sane. 2299ccd979bdSMark Fasheh * 2300ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 2301ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 2302ccd979bdSMark Fasheh * node provides a guarantee that we receive that 2303ccd979bdSMark Fasheh * message and can mark the inode before dropping any 2304ccd979bdSMark Fasheh * locks associated with it. */ 2305ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 2306ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 2307b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 2308ccd979bdSMark Fasheh "inode->i_generation: %u\n", 2309b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2310b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 2311ccd979bdSMark Fasheh inode->i_generation); 2312ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2313ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2314b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2315b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2316b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 2317ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 2318ccd979bdSMark Fasheh 2319ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 23208ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2321ccd979bdSMark Fasheh } 2322ccd979bdSMark Fasheh 2323ccd979bdSMark Fasheh status = 0; 2324ccd979bdSMark Fasheh bail_refresh: 2325ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2326ccd979bdSMark Fasheh bail: 2327ccd979bdSMark Fasheh return status; 2328ccd979bdSMark Fasheh } 2329ccd979bdSMark Fasheh 2330ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 2331ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2332ccd979bdSMark Fasheh struct buffer_head *passed_bh) 2333ccd979bdSMark Fasheh { 2334ccd979bdSMark Fasheh int status; 2335ccd979bdSMark Fasheh 2336ccd979bdSMark Fasheh if (passed_bh) { 2337ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 2338ccd979bdSMark Fasheh * returned bh. */ 2339ccd979bdSMark Fasheh *ret_bh = passed_bh; 2340ccd979bdSMark Fasheh get_bh(*ret_bh); 2341ccd979bdSMark Fasheh 2342ccd979bdSMark Fasheh return 0; 2343ccd979bdSMark Fasheh } 2344ccd979bdSMark Fasheh 2345b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, ret_bh); 2346ccd979bdSMark Fasheh if (status < 0) 2347ccd979bdSMark Fasheh mlog_errno(status); 2348ccd979bdSMark Fasheh 2349ccd979bdSMark Fasheh return status; 2350ccd979bdSMark Fasheh } 2351ccd979bdSMark Fasheh 2352ccd979bdSMark Fasheh /* 2353ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 2354ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 2355ccd979bdSMark Fasheh */ 2356cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode, 2357ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2358ccd979bdSMark Fasheh int ex, 2359cb25797dSJan Kara int arg_flags, 2360cb25797dSJan Kara int subclass) 2361ccd979bdSMark Fasheh { 2362bd3e7610SJoel Becker int status, level, acquired; 2363bd3e7610SJoel Becker u32 dlm_flags; 2364c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 2365ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2366ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 2367ccd979bdSMark Fasheh 2368b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 2369b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2370ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2371ccd979bdSMark Fasheh 2372ccd979bdSMark Fasheh status = 0; 2373ccd979bdSMark Fasheh acquired = 0; 2374ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 2375ccd979bdSMark Fasheh * rodevices. */ 2376ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 2377ccd979bdSMark Fasheh if (ex) 2378ccd979bdSMark Fasheh status = -EROFS; 237903efed8aSTiger Yang goto getbh; 2380ccd979bdSMark Fasheh } 2381ccd979bdSMark Fasheh 2382439a36b8SEric Ren if ((arg_flags & OCFS2_META_LOCK_GETBH) || 2383439a36b8SEric Ren ocfs2_mount_local(osb)) 2384439a36b8SEric Ren goto update; 2385c271c5c2SSunil Mushran 2386ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2387553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2388ccd979bdSMark Fasheh 2389e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 2390bd3e7610SJoel Becker level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2391ccd979bdSMark Fasheh dlm_flags = 0; 2392ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2393bd3e7610SJoel Becker dlm_flags |= DLM_LKF_NOQUEUE; 2394ccd979bdSMark Fasheh 2395cb25797dSJan Kara status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, 2396cb25797dSJan Kara arg_flags, subclass, _RET_IP_); 2397ccd979bdSMark Fasheh if (status < 0) { 239841003a7bSZach Brown if (status != -EAGAIN) 2399ccd979bdSMark Fasheh mlog_errno(status); 2400ccd979bdSMark Fasheh goto bail; 2401ccd979bdSMark Fasheh } 2402ccd979bdSMark Fasheh 2403ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 2404ccd979bdSMark Fasheh acquired = 1; 2405ccd979bdSMark Fasheh 2406ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 2407ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 2408ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 2409ccd979bdSMark Fasheh * abort the operation. */ 2410ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2411553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2412ccd979bdSMark Fasheh 2413439a36b8SEric Ren update: 241424c19ef4SMark Fasheh /* 241524c19ef4SMark Fasheh * We only see this flag if we're being called from 241624c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 241724c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 241824c19ef4SMark Fasheh * and let the caller handle it. 241924c19ef4SMark Fasheh */ 242024c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 242124c19ef4SMark Fasheh status = 0; 2422c271c5c2SSunil Mushran if (lockres) 242324c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 242424c19ef4SMark Fasheh goto bail; 242524c19ef4SMark Fasheh } 242624c19ef4SMark Fasheh 2427ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 2428e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 2429ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 2430ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 2431ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 2432e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 2433ccd979bdSMark Fasheh if (status < 0) { 2434ccd979bdSMark Fasheh if (status != -ENOENT) 2435ccd979bdSMark Fasheh mlog_errno(status); 2436ccd979bdSMark Fasheh goto bail; 2437ccd979bdSMark Fasheh } 243803efed8aSTiger Yang getbh: 2439ccd979bdSMark Fasheh if (ret_bh) { 2440ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2441ccd979bdSMark Fasheh if (status < 0) { 2442ccd979bdSMark Fasheh mlog_errno(status); 2443ccd979bdSMark Fasheh goto bail; 2444ccd979bdSMark Fasheh } 2445ccd979bdSMark Fasheh } 2446ccd979bdSMark Fasheh 2447ccd979bdSMark Fasheh bail: 2448ccd979bdSMark Fasheh if (status < 0) { 2449ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 2450ccd979bdSMark Fasheh brelse(*ret_bh); 2451ccd979bdSMark Fasheh *ret_bh = NULL; 2452ccd979bdSMark Fasheh } 2453ccd979bdSMark Fasheh if (acquired) 2454e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2455ccd979bdSMark Fasheh } 2456ccd979bdSMark Fasheh 2457ccd979bdSMark Fasheh if (local_bh) 2458ccd979bdSMark Fasheh brelse(local_bh); 2459ccd979bdSMark Fasheh 2460ccd979bdSMark Fasheh return status; 2461ccd979bdSMark Fasheh } 2462ccd979bdSMark Fasheh 2463ccd979bdSMark Fasheh /* 246434d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 246534d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 246634d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 2467ccd979bdSMark Fasheh * 2468ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 2469ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 2470ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2471ccd979bdSMark Fasheh * 247234d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 247334d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 247434d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 247534d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 247634d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 247734d024f8SMark Fasheh * immediately retry the aop call. 2478ccd979bdSMark Fasheh */ 2479e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 2480ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2481ccd979bdSMark Fasheh int ex, 2482ccd979bdSMark Fasheh struct page *page) 2483ccd979bdSMark Fasheh { 2484ccd979bdSMark Fasheh int ret; 2485ccd979bdSMark Fasheh 2486e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2487ccd979bdSMark Fasheh if (ret == -EAGAIN) { 2488ccd979bdSMark Fasheh unlock_page(page); 2489ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 2490ccd979bdSMark Fasheh } 2491ccd979bdSMark Fasheh 2492ccd979bdSMark Fasheh return ret; 2493ccd979bdSMark Fasheh } 2494ccd979bdSMark Fasheh 2495e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 24967f1a37e3STiger Yang struct vfsmount *vfsmnt, 24977f1a37e3STiger Yang int *level) 24987f1a37e3STiger Yang { 24997f1a37e3STiger Yang int ret; 25007f1a37e3STiger Yang 2501e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 25027f1a37e3STiger Yang if (ret < 0) { 25037f1a37e3STiger Yang mlog_errno(ret); 25047f1a37e3STiger Yang return ret; 25057f1a37e3STiger Yang } 25067f1a37e3STiger Yang 25077f1a37e3STiger Yang /* 25087f1a37e3STiger Yang * If we should update atime, we will get EX lock, 25097f1a37e3STiger Yang * otherwise we just get PR lock. 25107f1a37e3STiger Yang */ 25117f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 25127f1a37e3STiger Yang struct buffer_head *bh = NULL; 25137f1a37e3STiger Yang 2514e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 2515e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 25167f1a37e3STiger Yang if (ret < 0) { 25177f1a37e3STiger Yang mlog_errno(ret); 25187f1a37e3STiger Yang return ret; 25197f1a37e3STiger Yang } 25207f1a37e3STiger Yang *level = 1; 25217f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 25227f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 25237f1a37e3STiger Yang if (bh) 25247f1a37e3STiger Yang brelse(bh); 25257f1a37e3STiger Yang } else 25267f1a37e3STiger Yang *level = 0; 25277f1a37e3STiger Yang 25287f1a37e3STiger Yang return ret; 25297f1a37e3STiger Yang } 25307f1a37e3STiger Yang 2531e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 2532ccd979bdSMark Fasheh int ex) 2533ccd979bdSMark Fasheh { 2534bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2535e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2536c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2537ccd979bdSMark Fasheh 2538b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 2539b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2540ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2541ccd979bdSMark Fasheh 2542c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2543c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 2544ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2545ccd979bdSMark Fasheh } 2546ccd979bdSMark Fasheh 2547439a36b8SEric Ren /* 2548439a36b8SEric Ren * This _tracker variantes are introduced to deal with the recursive cluster 2549439a36b8SEric Ren * locking issue. The idea is to keep track of a lock holder on the stack of 2550439a36b8SEric Ren * the current process. If there's a lock holder on the stack, we know the 2551439a36b8SEric Ren * task context is already protected by cluster locking. Currently, they're 2552439a36b8SEric Ren * used in some VFS entry routines. 2553439a36b8SEric Ren * 2554439a36b8SEric Ren * return < 0 on error, return == 0 if there's no lock holder on the stack 2555439a36b8SEric Ren * before this call, return == 1 if this call would be a recursive locking. 2556439a36b8SEric Ren */ 2557439a36b8SEric Ren int ocfs2_inode_lock_tracker(struct inode *inode, 2558439a36b8SEric Ren struct buffer_head **ret_bh, 2559439a36b8SEric Ren int ex, 2560439a36b8SEric Ren struct ocfs2_lock_holder *oh) 2561439a36b8SEric Ren { 2562439a36b8SEric Ren int status; 2563439a36b8SEric Ren int arg_flags = 0, has_locked; 2564439a36b8SEric Ren struct ocfs2_lock_res *lockres; 2565439a36b8SEric Ren 2566439a36b8SEric Ren lockres = &OCFS2_I(inode)->ip_inode_lockres; 2567439a36b8SEric Ren has_locked = ocfs2_is_locked_by_me(lockres); 2568439a36b8SEric Ren /* Just get buffer head if the cluster lock has been taken */ 2569439a36b8SEric Ren if (has_locked) 2570439a36b8SEric Ren arg_flags = OCFS2_META_LOCK_GETBH; 2571439a36b8SEric Ren 2572439a36b8SEric Ren if (likely(!has_locked || ret_bh)) { 2573439a36b8SEric Ren status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags); 2574439a36b8SEric Ren if (status < 0) { 2575439a36b8SEric Ren if (status != -ENOENT) 2576439a36b8SEric Ren mlog_errno(status); 2577439a36b8SEric Ren return status; 2578439a36b8SEric Ren } 2579439a36b8SEric Ren } 2580439a36b8SEric Ren if (!has_locked) 2581439a36b8SEric Ren ocfs2_add_holder(lockres, oh); 2582439a36b8SEric Ren 2583439a36b8SEric Ren return has_locked; 2584439a36b8SEric Ren } 2585439a36b8SEric Ren 2586439a36b8SEric Ren void ocfs2_inode_unlock_tracker(struct inode *inode, 2587439a36b8SEric Ren int ex, 2588439a36b8SEric Ren struct ocfs2_lock_holder *oh, 2589439a36b8SEric Ren int had_lock) 2590439a36b8SEric Ren { 2591439a36b8SEric Ren struct ocfs2_lock_res *lockres; 2592439a36b8SEric Ren 2593439a36b8SEric Ren lockres = &OCFS2_I(inode)->ip_inode_lockres; 2594439a36b8SEric Ren if (!had_lock) { 2595439a36b8SEric Ren ocfs2_remove_holder(lockres, oh); 2596439a36b8SEric Ren ocfs2_inode_unlock(inode, ex); 2597439a36b8SEric Ren } 2598439a36b8SEric Ren } 2599439a36b8SEric Ren 2600df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 260183273932SSrinivas Eeda { 260283273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 260383273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 260483273932SSrinivas Eeda int status = 0; 260583273932SSrinivas Eeda 2606df152c24SSunil Mushran if (ocfs2_is_hard_readonly(osb)) 2607df152c24SSunil Mushran return -EROFS; 2608df152c24SSunil Mushran 2609df152c24SSunil Mushran if (ocfs2_mount_local(osb)) 2610df152c24SSunil Mushran return 0; 2611df152c24SSunil Mushran 261283273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 2613df152c24SSunil Mushran status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 261483273932SSrinivas Eeda if (status < 0) 261583273932SSrinivas Eeda return status; 261683273932SSrinivas Eeda 261783273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 26181c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 26191c520dfbSJoel Becker lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 262083273932SSrinivas Eeda *seqno = be32_to_cpu(lvb->lvb_os_seqno); 26213211949fSSunil Mushran else 26223211949fSSunil Mushran *seqno = osb->osb_orphan_scan.os_seqno + 1; 26233211949fSSunil Mushran 262483273932SSrinivas Eeda return status; 262583273932SSrinivas Eeda } 262683273932SSrinivas Eeda 2627df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) 262883273932SSrinivas Eeda { 262983273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 263083273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 263183273932SSrinivas Eeda 2632df152c24SSunil Mushran if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { 263383273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 263483273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 263583273932SSrinivas Eeda lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 263683273932SSrinivas Eeda lvb->lvb_os_seqno = cpu_to_be32(seqno); 2637df152c24SSunil Mushran ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2638df152c24SSunil Mushran } 263983273932SSrinivas Eeda } 264083273932SSrinivas Eeda 2641ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 2642ccd979bdSMark Fasheh int ex) 2643ccd979bdSMark Fasheh { 2644c271c5c2SSunil Mushran int status = 0; 2645bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2646ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2647ccd979bdSMark Fasheh 2648ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2649ccd979bdSMark Fasheh return -EROFS; 2650ccd979bdSMark Fasheh 2651c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2652c271c5c2SSunil Mushran goto bail; 2653c271c5c2SSunil Mushran 2654ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2655ccd979bdSMark Fasheh if (status < 0) { 2656ccd979bdSMark Fasheh mlog_errno(status); 2657ccd979bdSMark Fasheh goto bail; 2658ccd979bdSMark Fasheh } 2659ccd979bdSMark Fasheh 2660ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 2661ccd979bdSMark Fasheh * know when resources covered by the lock need to be 2662ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 2663ccd979bdSMark Fasheh * everything is up to the caller :) */ 2664ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 2665ccd979bdSMark Fasheh if (status) { 26668e8a4603SMark Fasheh status = ocfs2_refresh_slot_info(osb); 2667ccd979bdSMark Fasheh 2668ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2669ccd979bdSMark Fasheh 26703278bb74SJunxiao Bi if (status < 0) { 26713278bb74SJunxiao Bi ocfs2_cluster_unlock(osb, lockres, level); 2672ccd979bdSMark Fasheh mlog_errno(status); 26733278bb74SJunxiao Bi } 26748ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2675ccd979bdSMark Fasheh } 2676ccd979bdSMark Fasheh bail: 2677ccd979bdSMark Fasheh return status; 2678ccd979bdSMark Fasheh } 2679ccd979bdSMark Fasheh 2680ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 2681ccd979bdSMark Fasheh int ex) 2682ccd979bdSMark Fasheh { 2683bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2684ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2685ccd979bdSMark Fasheh 2686c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2687ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 2688ccd979bdSMark Fasheh } 2689ccd979bdSMark Fasheh 2690ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 2691ccd979bdSMark Fasheh { 2692ccd979bdSMark Fasheh int status; 2693ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2694ccd979bdSMark Fasheh 2695ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2696ccd979bdSMark Fasheh return -EROFS; 2697ccd979bdSMark Fasheh 2698c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2699c271c5c2SSunil Mushran return 0; 2700c271c5c2SSunil Mushran 2701bd3e7610SJoel Becker status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2702ccd979bdSMark Fasheh if (status < 0) 2703ccd979bdSMark Fasheh mlog_errno(status); 2704ccd979bdSMark Fasheh 2705ccd979bdSMark Fasheh return status; 2706ccd979bdSMark Fasheh } 2707ccd979bdSMark Fasheh 2708ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 2709ccd979bdSMark Fasheh { 2710ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2711ccd979bdSMark Fasheh 2712c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2713bd3e7610SJoel Becker ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2714ccd979bdSMark Fasheh } 2715ccd979bdSMark Fasheh 27166ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) 27176ca497a8Swengang wang { 27186ca497a8Swengang wang int status; 27196ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 27206ca497a8Swengang wang 27216ca497a8Swengang wang if (ocfs2_is_hard_readonly(osb)) 27226ca497a8Swengang wang return -EROFS; 27236ca497a8Swengang wang 27246ca497a8Swengang wang if (ocfs2_mount_local(osb)) 27256ca497a8Swengang wang return 0; 27266ca497a8Swengang wang 27276ca497a8Swengang wang status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, 27286ca497a8Swengang wang 0, 0); 27296ca497a8Swengang wang if (status < 0) 27306ca497a8Swengang wang mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); 27316ca497a8Swengang wang 27326ca497a8Swengang wang return status; 27336ca497a8Swengang wang } 27346ca497a8Swengang wang 27356ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) 27366ca497a8Swengang wang { 27376ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 27386ca497a8Swengang wang 27396ca497a8Swengang wang if (!ocfs2_mount_local(osb)) 27406ca497a8Swengang wang ocfs2_cluster_unlock(osb, lockres, 27416ca497a8Swengang wang ex ? LKM_EXMODE : LKM_PRMODE); 27426ca497a8Swengang wang } 27436ca497a8Swengang wang 2744d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2745d680efe9SMark Fasheh { 2746d680efe9SMark Fasheh int ret; 2747bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2748d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2749d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2750d680efe9SMark Fasheh 2751d680efe9SMark Fasheh BUG_ON(!dl); 2752d680efe9SMark Fasheh 275303efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb)) { 275403efed8aSTiger Yang if (ex) 2755d680efe9SMark Fasheh return -EROFS; 275603efed8aSTiger Yang return 0; 275703efed8aSTiger Yang } 2758d680efe9SMark Fasheh 2759c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2760c271c5c2SSunil Mushran return 0; 2761c271c5c2SSunil Mushran 2762d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2763d680efe9SMark Fasheh if (ret < 0) 2764d680efe9SMark Fasheh mlog_errno(ret); 2765d680efe9SMark Fasheh 2766d680efe9SMark Fasheh return ret; 2767d680efe9SMark Fasheh } 2768d680efe9SMark Fasheh 2769d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2770d680efe9SMark Fasheh { 2771bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2772d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2773d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2774d680efe9SMark Fasheh 277503efed8aSTiger Yang if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 2776d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2777d680efe9SMark Fasheh } 2778d680efe9SMark Fasheh 2779ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 2780ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 2781ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 2782ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 2783ccd979bdSMark Fasheh { 2784ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2785ccd979bdSMark Fasheh 2786ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2787ccd979bdSMark Fasheh 2788ccd979bdSMark Fasheh kfree(dlm_debug); 2789ccd979bdSMark Fasheh } 2790ccd979bdSMark Fasheh 2791ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2792ccd979bdSMark Fasheh { 2793ccd979bdSMark Fasheh if (dlm_debug) 2794ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2795ccd979bdSMark Fasheh } 2796ccd979bdSMark Fasheh 2797ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2798ccd979bdSMark Fasheh { 2799ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 2800ccd979bdSMark Fasheh } 2801ccd979bdSMark Fasheh 2802ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2803ccd979bdSMark Fasheh { 2804ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2805ccd979bdSMark Fasheh 2806ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2807ccd979bdSMark Fasheh if (!dlm_debug) { 2808ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 2809ccd979bdSMark Fasheh goto out; 2810ccd979bdSMark Fasheh } 2811ccd979bdSMark Fasheh 2812ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 2813ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2814ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 2815ccd979bdSMark Fasheh out: 2816ccd979bdSMark Fasheh return dlm_debug; 2817ccd979bdSMark Fasheh } 2818ccd979bdSMark Fasheh 2819ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 2820ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 2821ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 2822ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 2823ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 2824ccd979bdSMark Fasheh }; 2825ccd979bdSMark Fasheh 2826ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2827ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2828ccd979bdSMark Fasheh { 2829ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2830ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2831ccd979bdSMark Fasheh 2832ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2833ccd979bdSMark Fasheh 2834ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2835ccd979bdSMark Fasheh /* discover the head of the list */ 2836ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2837ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2838ccd979bdSMark Fasheh break; 2839ccd979bdSMark Fasheh } 2840ccd979bdSMark Fasheh 2841ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2842ccd979bdSMark Fasheh * l_ops field. */ 2843ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2844ccd979bdSMark Fasheh ret = iter; 2845ccd979bdSMark Fasheh break; 2846ccd979bdSMark Fasheh } 2847ccd979bdSMark Fasheh } 2848ccd979bdSMark Fasheh 2849ccd979bdSMark Fasheh return ret; 2850ccd979bdSMark Fasheh } 2851ccd979bdSMark Fasheh 2852ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2853ccd979bdSMark Fasheh { 2854ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2855ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2856ccd979bdSMark Fasheh 2857ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2858ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2859ccd979bdSMark Fasheh if (iter) { 2860ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2861ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2862ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2863ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2864ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2865ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2866ccd979bdSMark Fasheh * in them. */ 2867ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2868ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2869ccd979bdSMark Fasheh } 2870ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2871ccd979bdSMark Fasheh 2872ccd979bdSMark Fasheh return iter; 2873ccd979bdSMark Fasheh } 2874ccd979bdSMark Fasheh 2875ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2876ccd979bdSMark Fasheh { 2877ccd979bdSMark Fasheh } 2878ccd979bdSMark Fasheh 2879ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2880ccd979bdSMark Fasheh { 2881ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2882ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 2883ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2884ccd979bdSMark Fasheh 2885ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2886ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 2887ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 2888ccd979bdSMark Fasheh if (iter) { 2889ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 2890ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2891ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2892ccd979bdSMark Fasheh } 2893ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2894ccd979bdSMark Fasheh 2895ccd979bdSMark Fasheh return iter; 2896ccd979bdSMark Fasheh } 2897ccd979bdSMark Fasheh 28985bc970e8SSunil Mushran /* 28995bc970e8SSunil Mushran * Version is used by debugfs.ocfs2 to determine the format being used 29005bc970e8SSunil Mushran * 29015bc970e8SSunil Mushran * New in version 2 29025bc970e8SSunil Mushran * - Lock stats printed 29035bc970e8SSunil Mushran * New in version 3 29045bc970e8SSunil Mushran * - Max time in lock stats is in usecs (instead of nsecs) 29055bc970e8SSunil Mushran */ 29065bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3 2907ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2908ccd979bdSMark Fasheh { 2909ccd979bdSMark Fasheh int i; 2910ccd979bdSMark Fasheh char *lvb; 2911ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 2912ccd979bdSMark Fasheh 2913ccd979bdSMark Fasheh if (!lockres) 2914ccd979bdSMark Fasheh return -EINVAL; 2915ccd979bdSMark Fasheh 2916d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2917d680efe9SMark Fasheh 2918d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2919d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2920d680efe9SMark Fasheh lockres->l_name, 2921d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2922d680efe9SMark Fasheh else 2923d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2924d680efe9SMark Fasheh 2925d680efe9SMark Fasheh seq_printf(m, "%d\t" 2926ccd979bdSMark Fasheh "0x%lx\t" 2927ccd979bdSMark Fasheh "0x%x\t" 2928ccd979bdSMark Fasheh "0x%x\t" 2929ccd979bdSMark Fasheh "%u\t" 2930ccd979bdSMark Fasheh "%u\t" 2931ccd979bdSMark Fasheh "%d\t" 2932ccd979bdSMark Fasheh "%d\t", 2933ccd979bdSMark Fasheh lockres->l_level, 2934ccd979bdSMark Fasheh lockres->l_flags, 2935ccd979bdSMark Fasheh lockres->l_action, 2936ccd979bdSMark Fasheh lockres->l_unlock_action, 2937ccd979bdSMark Fasheh lockres->l_ro_holders, 2938ccd979bdSMark Fasheh lockres->l_ex_holders, 2939ccd979bdSMark Fasheh lockres->l_requested, 2940ccd979bdSMark Fasheh lockres->l_blocking); 2941ccd979bdSMark Fasheh 2942ccd979bdSMark Fasheh /* Dump the raw LVB */ 29438f2c9c1bSJoel Becker lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2944ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 2945ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 2946ccd979bdSMark Fasheh 29478ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 29485bc970e8SSunil Mushran # define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets) 29495bc970e8SSunil Mushran # define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets) 29505bc970e8SSunil Mushran # define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail) 29515bc970e8SSunil Mushran # define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail) 29525bc970e8SSunil Mushran # define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total) 29535bc970e8SSunil Mushran # define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total) 29545bc970e8SSunil Mushran # define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max) 29555bc970e8SSunil Mushran # define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max) 29565bc970e8SSunil Mushran # define lock_refresh(_l) ((_l)->l_lock_refresh) 29578ddb7b00SSunil Mushran #else 29585bc970e8SSunil Mushran # define lock_num_prmode(_l) (0) 29595bc970e8SSunil Mushran # define lock_num_exmode(_l) (0) 29608ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (0) 29618ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (0) 2962dd25e55eSRandy Dunlap # define lock_total_prmode(_l) (0ULL) 2963dd25e55eSRandy Dunlap # define lock_total_exmode(_l) (0ULL) 29648ddb7b00SSunil Mushran # define lock_max_prmode(_l) (0) 29658ddb7b00SSunil Mushran # define lock_max_exmode(_l) (0) 29668ddb7b00SSunil Mushran # define lock_refresh(_l) (0) 29678ddb7b00SSunil Mushran #endif 29688ddb7b00SSunil Mushran /* The following seq_print was added in version 2 of this output */ 29695bc970e8SSunil Mushran seq_printf(m, "%u\t" 29705bc970e8SSunil Mushran "%u\t" 29718ddb7b00SSunil Mushran "%u\t" 29728ddb7b00SSunil Mushran "%u\t" 29738ddb7b00SSunil Mushran "%llu\t" 29748ddb7b00SSunil Mushran "%llu\t" 29758ddb7b00SSunil Mushran "%u\t" 29768ddb7b00SSunil Mushran "%u\t" 29778ddb7b00SSunil Mushran "%u\t", 29788ddb7b00SSunil Mushran lock_num_prmode(lockres), 29798ddb7b00SSunil Mushran lock_num_exmode(lockres), 29808ddb7b00SSunil Mushran lock_num_prmode_failed(lockres), 29818ddb7b00SSunil Mushran lock_num_exmode_failed(lockres), 29828ddb7b00SSunil Mushran lock_total_prmode(lockres), 29838ddb7b00SSunil Mushran lock_total_exmode(lockres), 29848ddb7b00SSunil Mushran lock_max_prmode(lockres), 29858ddb7b00SSunil Mushran lock_max_exmode(lockres), 29868ddb7b00SSunil Mushran lock_refresh(lockres)); 29878ddb7b00SSunil Mushran 2988ccd979bdSMark Fasheh /* End the line */ 2989ccd979bdSMark Fasheh seq_printf(m, "\n"); 2990ccd979bdSMark Fasheh return 0; 2991ccd979bdSMark Fasheh } 2992ccd979bdSMark Fasheh 299390d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = { 2994ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 2995ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 2996ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 2997ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 2998ccd979bdSMark Fasheh }; 2999ccd979bdSMark Fasheh 3000ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 3001ccd979bdSMark Fasheh { 300233fa1d90SJoe Perches struct seq_file *seq = file->private_data; 3003ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 3004ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 3005ccd979bdSMark Fasheh 3006ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 3007ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 3008ccd979bdSMark Fasheh return seq_release_private(inode, file); 3009ccd979bdSMark Fasheh } 3010ccd979bdSMark Fasheh 3011ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 3012ccd979bdSMark Fasheh { 3013ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 3014ccd979bdSMark Fasheh struct ocfs2_super *osb; 3015ccd979bdSMark Fasheh 30161848cb55SRob Jones priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv)); 3017ccd979bdSMark Fasheh if (!priv) { 30181848cb55SRob Jones mlog_errno(-ENOMEM); 30191848cb55SRob Jones return -ENOMEM; 3020ccd979bdSMark Fasheh } 30211848cb55SRob Jones 30228e18e294STheodore Ts'o osb = inode->i_private; 3023ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 3024ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 3025ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 3026ccd979bdSMark Fasheh 3027ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 3028ccd979bdSMark Fasheh priv->p_dlm_debug); 3029ccd979bdSMark Fasheh 30301848cb55SRob Jones return 0; 3031ccd979bdSMark Fasheh } 3032ccd979bdSMark Fasheh 30334b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 3034ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 3035ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 3036ccd979bdSMark Fasheh .read = seq_read, 3037ccd979bdSMark Fasheh .llseek = seq_lseek, 3038ccd979bdSMark Fasheh }; 3039ccd979bdSMark Fasheh 3040ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 3041ccd979bdSMark Fasheh { 3042ccd979bdSMark Fasheh int ret = 0; 3043ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 3044ccd979bdSMark Fasheh 3045ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 3046ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 3047ccd979bdSMark Fasheh osb->osb_debug_root, 3048ccd979bdSMark Fasheh osb, 3049ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 30508f443e23SLinus Torvalds if (!dlm_debug->d_locking_state) { 3051ccd979bdSMark Fasheh ret = -EINVAL; 3052ccd979bdSMark Fasheh mlog(ML_ERROR, 3053ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 3054ccd979bdSMark Fasheh goto out; 3055ccd979bdSMark Fasheh } 3056ccd979bdSMark Fasheh 3057ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 3058ccd979bdSMark Fasheh out: 3059ccd979bdSMark Fasheh return ret; 3060ccd979bdSMark Fasheh } 3061ccd979bdSMark Fasheh 3062ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 3063ccd979bdSMark Fasheh { 3064ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 3065ccd979bdSMark Fasheh 3066ccd979bdSMark Fasheh if (dlm_debug) { 3067ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 3068ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 3069ccd979bdSMark Fasheh } 3070ccd979bdSMark Fasheh } 3071ccd979bdSMark Fasheh 3072ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 3073ccd979bdSMark Fasheh { 3074c271c5c2SSunil Mushran int status = 0; 30754670c46dSJoel Becker struct ocfs2_cluster_connection *conn = NULL; 3076ccd979bdSMark Fasheh 30770abd6d18SMark Fasheh if (ocfs2_mount_local(osb)) { 30780abd6d18SMark Fasheh osb->node_num = 0; 3079c271c5c2SSunil Mushran goto local; 30800abd6d18SMark Fasheh } 3081c271c5c2SSunil Mushran 3082ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 3083ccd979bdSMark Fasheh if (status < 0) { 3084ccd979bdSMark Fasheh mlog_errno(status); 3085ccd979bdSMark Fasheh goto bail; 3086ccd979bdSMark Fasheh } 3087ccd979bdSMark Fasheh 308834d024f8SMark Fasheh /* launch downconvert thread */ 30895afc44e2SJoseph Qi osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s", 30905afc44e2SJoseph Qi osb->uuid_str); 309134d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 309234d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 309334d024f8SMark Fasheh osb->dc_task = NULL; 3094ccd979bdSMark Fasheh mlog_errno(status); 3095ccd979bdSMark Fasheh goto bail; 3096ccd979bdSMark Fasheh } 3097ccd979bdSMark Fasheh 3098ccd979bdSMark Fasheh /* for now, uuid == domain */ 30999c6c877cSJoel Becker status = ocfs2_cluster_connect(osb->osb_cluster_stack, 3100c74a3bddSGoldwyn Rodrigues osb->osb_cluster_name, 3101c74a3bddSGoldwyn Rodrigues strlen(osb->osb_cluster_name), 31029c6c877cSJoel Becker osb->uuid_str, 31034670c46dSJoel Becker strlen(osb->uuid_str), 3104553b5eb9SJoel Becker &lproto, ocfs2_do_node_down, osb, 31054670c46dSJoel Becker &conn); 31064670c46dSJoel Becker if (status) { 3107ccd979bdSMark Fasheh mlog_errno(status); 3108ccd979bdSMark Fasheh goto bail; 3109ccd979bdSMark Fasheh } 3110ccd979bdSMark Fasheh 31113e834151SGoldwyn Rodrigues status = ocfs2_cluster_this_node(conn, &osb->node_num); 31120abd6d18SMark Fasheh if (status < 0) { 31130abd6d18SMark Fasheh mlog_errno(status); 31140abd6d18SMark Fasheh mlog(ML_ERROR, 31150abd6d18SMark Fasheh "could not find this host's node number\n"); 3116286eaa95SJoel Becker ocfs2_cluster_disconnect(conn, 0); 31170abd6d18SMark Fasheh goto bail; 31180abd6d18SMark Fasheh } 31190abd6d18SMark Fasheh 3120c271c5c2SSunil Mushran local: 3121ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 3122ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 31236ca497a8Swengang wang ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 312483273932SSrinivas Eeda ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); 3125ccd979bdSMark Fasheh 31264670c46dSJoel Becker osb->cconn = conn; 3127ccd979bdSMark Fasheh bail: 3128ccd979bdSMark Fasheh if (status < 0) { 3129ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 313034d024f8SMark Fasheh if (osb->dc_task) 313134d024f8SMark Fasheh kthread_stop(osb->dc_task); 3132ccd979bdSMark Fasheh } 3133ccd979bdSMark Fasheh 3134ccd979bdSMark Fasheh return status; 3135ccd979bdSMark Fasheh } 3136ccd979bdSMark Fasheh 3137286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3138286eaa95SJoel Becker int hangup_pending) 3139ccd979bdSMark Fasheh { 3140ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 3141ccd979bdSMark Fasheh 31424670c46dSJoel Becker /* 31434670c46dSJoel Becker * Now that we have dropped all locks and ocfs2_dismount_volume() 31444670c46dSJoel Becker * has disabled recovery, the DLM won't be talking to us. It's 31454670c46dSJoel Becker * safe to tear things down before disconnecting the cluster. 31464670c46dSJoel Becker */ 31474670c46dSJoel Becker 314834d024f8SMark Fasheh if (osb->dc_task) { 314934d024f8SMark Fasheh kthread_stop(osb->dc_task); 315034d024f8SMark Fasheh osb->dc_task = NULL; 3151ccd979bdSMark Fasheh } 3152ccd979bdSMark Fasheh 3153ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 3154ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 31556ca497a8Swengang wang ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 315683273932SSrinivas Eeda ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); 3157ccd979bdSMark Fasheh 3158286eaa95SJoel Becker ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 31594670c46dSJoel Becker osb->cconn = NULL; 3160ccd979bdSMark Fasheh 3161ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 3162ccd979bdSMark Fasheh } 3163ccd979bdSMark Fasheh 3164ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 31650d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 3166ccd979bdSMark Fasheh { 31677431cd7eSJoel Becker int ret; 3168ccd979bdSMark Fasheh unsigned long flags; 3169bd3e7610SJoel Becker u32 lkm_flags = 0; 3170ccd979bdSMark Fasheh 3171ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 3172ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 3173ccd979bdSMark Fasheh goto out; 3174ccd979bdSMark Fasheh 3175b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3176bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 3177b80fc012SMark Fasheh 3178ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3179ccd979bdSMark Fasheh 3180ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 3181ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 3182ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3183ccd979bdSMark Fasheh 3184ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 3185ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 3186ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 3187ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 3188ccd979bdSMark Fasheh lockres->l_unlock_action); 3189ccd979bdSMark Fasheh 3190ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3191ccd979bdSMark Fasheh 3192ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 3193ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 3194ccd979bdSMark Fasheh * future? */ 3195ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3196ccd979bdSMark Fasheh 3197ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3198ccd979bdSMark Fasheh } 3199ccd979bdSMark Fasheh 32000d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 32010d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 3202bd3e7610SJoel Becker lockres->l_level == DLM_LOCK_EX && 32030d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 32040d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 32050d5dc6c2SMark Fasheh } 3206ccd979bdSMark Fasheh 3207ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 3208ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 3209ccd979bdSMark Fasheh lockres->l_name); 3210ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3211ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 3212ccd979bdSMark Fasheh 3213ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 3214ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3215ccd979bdSMark Fasheh goto out; 3216ccd979bdSMark Fasheh } 3217ccd979bdSMark Fasheh 3218ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 3219ccd979bdSMark Fasheh 3220ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 3221ccd979bdSMark Fasheh * fire. */ 3222ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 3223ccd979bdSMark Fasheh 3224ccd979bdSMark Fasheh /* is this necessary? */ 3225ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3226ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 3227ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3228ccd979bdSMark Fasheh 3229ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3230ccd979bdSMark Fasheh 3231a796d286SJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); 32327431cd7eSJoel Becker if (ret) { 32337431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3234ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 3235cf0acdcdSJoel Becker ocfs2_dlm_dump_lksb(&lockres->l_lksb); 3236ccd979bdSMark Fasheh BUG(); 3237ccd979bdSMark Fasheh } 323873ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", 3239ccd979bdSMark Fasheh lockres->l_name); 3240ccd979bdSMark Fasheh 3241ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3242ccd979bdSMark Fasheh out: 3243ccd979bdSMark Fasheh return 0; 3244ccd979bdSMark Fasheh } 3245ccd979bdSMark Fasheh 324684d86f83SJan Kara static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 324784d86f83SJan Kara struct ocfs2_lock_res *lockres); 324884d86f83SJan Kara 3249ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 3250ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 325134d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 3252ccd979bdSMark Fasheh * it safe to drop. 3253ccd979bdSMark Fasheh * 3254ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 325584d86f83SJan Kara void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, 325684d86f83SJan Kara struct ocfs2_lock_res *lockres) 3257ccd979bdSMark Fasheh { 3258ccd979bdSMark Fasheh int status; 3259ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 326084d86f83SJan Kara unsigned long flags, flags2; 3261ccd979bdSMark Fasheh 3262ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 3263ccd979bdSMark Fasheh 3264ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3265ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 326684d86f83SJan Kara if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) { 326784d86f83SJan Kara /* 326884d86f83SJan Kara * We know the downconvert is queued but not in progress 326984d86f83SJan Kara * because we are the downconvert thread and processing 327084d86f83SJan Kara * different lock. So we can just remove the lock from the 327184d86f83SJan Kara * queue. This is not only an optimization but also a way 327284d86f83SJan Kara * to avoid the following deadlock: 327384d86f83SJan Kara * ocfs2_dentry_post_unlock() 327484d86f83SJan Kara * ocfs2_dentry_lock_put() 327584d86f83SJan Kara * ocfs2_drop_dentry_lock() 327684d86f83SJan Kara * iput() 327784d86f83SJan Kara * ocfs2_evict_inode() 327884d86f83SJan Kara * ocfs2_clear_inode() 327984d86f83SJan Kara * ocfs2_mark_lockres_freeing() 328084d86f83SJan Kara * ... blocks waiting for OCFS2_LOCK_QUEUED 328184d86f83SJan Kara * since we are the downconvert thread which 328284d86f83SJan Kara * should clear the flag. 328384d86f83SJan Kara */ 328484d86f83SJan Kara spin_unlock_irqrestore(&lockres->l_lock, flags); 328584d86f83SJan Kara spin_lock_irqsave(&osb->dc_task_lock, flags2); 328684d86f83SJan Kara list_del_init(&lockres->l_blocked_list); 328784d86f83SJan Kara osb->blocked_lock_count--; 328884d86f83SJan Kara spin_unlock_irqrestore(&osb->dc_task_lock, flags2); 328984d86f83SJan Kara /* 329084d86f83SJan Kara * Warn if we recurse into another post_unlock call. Strictly 329184d86f83SJan Kara * speaking it isn't a problem but we need to be careful if 329284d86f83SJan Kara * that happens (stack overflow, deadlocks, ...) so warn if 329384d86f83SJan Kara * ocfs2 grows a path for which this can happen. 329484d86f83SJan Kara */ 329584d86f83SJan Kara WARN_ON_ONCE(lockres->l_ops->post_unlock); 329684d86f83SJan Kara /* Since the lock is freeing we don't do much in the fn below */ 329784d86f83SJan Kara ocfs2_process_blocked_lock(osb, lockres); 329884d86f83SJan Kara return; 329984d86f83SJan Kara } 3300ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3301ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3302ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3303ccd979bdSMark Fasheh 3304ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 3305ccd979bdSMark Fasheh 3306ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 3307ccd979bdSMark Fasheh if (status) 3308ccd979bdSMark Fasheh mlog_errno(status); 3309ccd979bdSMark Fasheh 3310ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3311ccd979bdSMark Fasheh } 3312ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3313ccd979bdSMark Fasheh } 3314ccd979bdSMark Fasheh 3315d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 3316d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3317d680efe9SMark Fasheh { 3318d680efe9SMark Fasheh int ret; 3319d680efe9SMark Fasheh 332084d86f83SJan Kara ocfs2_mark_lockres_freeing(osb, lockres); 33210d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 3322d680efe9SMark Fasheh if (ret) 3323d680efe9SMark Fasheh mlog_errno(ret); 3324d680efe9SMark Fasheh } 3325d680efe9SMark Fasheh 3326ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3327ccd979bdSMark Fasheh { 3328d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3329d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 33306ca497a8Swengang wang ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 333183273932SSrinivas Eeda ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); 3332ccd979bdSMark Fasheh } 3333ccd979bdSMark Fasheh 3334ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 3335ccd979bdSMark Fasheh { 3336ccd979bdSMark Fasheh int status, err; 3337ccd979bdSMark Fasheh 3338ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 3339ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 3340ccd979bdSMark Fasheh 3341ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 334250008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 3343ccd979bdSMark Fasheh if (err < 0) 3344ccd979bdSMark Fasheh mlog_errno(err); 3345ccd979bdSMark Fasheh 3346ccd979bdSMark Fasheh status = err; 3347ccd979bdSMark Fasheh 3348ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3349e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 3350ccd979bdSMark Fasheh if (err < 0) 3351ccd979bdSMark Fasheh mlog_errno(err); 3352ccd979bdSMark Fasheh if (err < 0 && !status) 3353ccd979bdSMark Fasheh status = err; 3354ccd979bdSMark Fasheh 3355ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 33560d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 3357ccd979bdSMark Fasheh if (err < 0) 3358ccd979bdSMark Fasheh mlog_errno(err); 3359ccd979bdSMark Fasheh if (err < 0 && !status) 3360ccd979bdSMark Fasheh status = err; 3361ccd979bdSMark Fasheh 3362ccd979bdSMark Fasheh return status; 3363ccd979bdSMark Fasheh } 3364ccd979bdSMark Fasheh 3365de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3366ccd979bdSMark Fasheh int new_level) 3367ccd979bdSMark Fasheh { 3368ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3369ccd979bdSMark Fasheh 3370bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3371ccd979bdSMark Fasheh 3372ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 33739b915181SSunil Mushran mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " 33749b915181SSunil Mushran "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " 33759b915181SSunil Mushran "block %d, pgen %d\n", lockres->l_name, lockres->l_level, 33769b915181SSunil Mushran new_level, list_empty(&lockres->l_blocked_list), 33779b915181SSunil Mushran list_empty(&lockres->l_mask_waiters), lockres->l_type, 33789b915181SSunil Mushran lockres->l_flags, lockres->l_ro_holders, 33799b915181SSunil Mushran lockres->l_ex_holders, lockres->l_action, 33809b915181SSunil Mushran lockres->l_unlock_action, lockres->l_requested, 33819b915181SSunil Mushran lockres->l_blocking, lockres->l_pending_gen); 3382ccd979bdSMark Fasheh BUG(); 3383ccd979bdSMark Fasheh } 3384ccd979bdSMark Fasheh 33859b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", 33869b915181SSunil Mushran lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); 3387ccd979bdSMark Fasheh 3388ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 3389ccd979bdSMark Fasheh lockres->l_requested = new_level; 3390ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3391de551246SJoel Becker return lockres_set_pending(lockres); 3392ccd979bdSMark Fasheh } 3393ccd979bdSMark Fasheh 3394ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3395ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3396ccd979bdSMark Fasheh int new_level, 3397de551246SJoel Becker int lvb, 3398de551246SJoel Becker unsigned int generation) 3399ccd979bdSMark Fasheh { 3400bd3e7610SJoel Becker int ret; 3401bd3e7610SJoel Becker u32 dlm_flags = DLM_LKF_CONVERT; 3402ccd979bdSMark Fasheh 34039b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, 34049b915181SSunil Mushran lockres->l_level, new_level); 34059b915181SSunil Mushran 3406e7ee2c08SEric Ren /* 3407e7ee2c08SEric Ren * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always 3408e7ee2c08SEric Ren * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that 3409e7ee2c08SEric Ren * we can recover correctly from node failure. Otherwise, we may get 3410e7ee2c08SEric Ren * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. 3411e7ee2c08SEric Ren */ 3412e7ee2c08SEric Ren if (!ocfs2_is_o2cb_active() && 3413e7ee2c08SEric Ren lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3414e7ee2c08SEric Ren lvb = 1; 3415e7ee2c08SEric Ren 3416ccd979bdSMark Fasheh if (lvb) 3417bd3e7610SJoel Becker dlm_flags |= DLM_LKF_VALBLK; 3418ccd979bdSMark Fasheh 34194670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 3420ccd979bdSMark Fasheh new_level, 3421ccd979bdSMark Fasheh &lockres->l_lksb, 3422ccd979bdSMark Fasheh dlm_flags, 3423ccd979bdSMark Fasheh lockres->l_name, 3424a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 3425de551246SJoel Becker lockres_clear_pending(lockres, generation, osb); 34267431cd7eSJoel Becker if (ret) { 34277431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3428ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 3429ccd979bdSMark Fasheh goto bail; 3430ccd979bdSMark Fasheh } 3431ccd979bdSMark Fasheh 3432ccd979bdSMark Fasheh ret = 0; 3433ccd979bdSMark Fasheh bail: 3434ccd979bdSMark Fasheh return ret; 3435ccd979bdSMark Fasheh } 3436ccd979bdSMark Fasheh 343724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3438ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3439ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3440ccd979bdSMark Fasheh { 3441ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3442ccd979bdSMark Fasheh 3443ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3444ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 3445ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 3446ccd979bdSMark Fasheh * requeue this lock. */ 34479b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); 3448ccd979bdSMark Fasheh return 0; 3449ccd979bdSMark Fasheh } 3450ccd979bdSMark Fasheh 3451ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 3452ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3453ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 3454ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 3455ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 3456ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3457ccd979bdSMark Fasheh 3458ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3459ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 3460ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3461ccd979bdSMark Fasheh 34629b915181SSunil Mushran mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 34639b915181SSunil Mushran 3464ccd979bdSMark Fasheh return 1; 3465ccd979bdSMark Fasheh } 3466ccd979bdSMark Fasheh 3467ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3468ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3469ccd979bdSMark Fasheh { 3470ccd979bdSMark Fasheh int ret; 3471ccd979bdSMark Fasheh 34724670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3473a796d286SJoel Becker DLM_LKF_CANCEL); 34747431cd7eSJoel Becker if (ret) { 34757431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3476ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 3477ccd979bdSMark Fasheh } 3478ccd979bdSMark Fasheh 34799b915181SSunil Mushran mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3480ccd979bdSMark Fasheh 3481ccd979bdSMark Fasheh return ret; 3482ccd979bdSMark Fasheh } 3483ccd979bdSMark Fasheh 3484b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3485ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3486cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 3487ccd979bdSMark Fasheh { 3488ccd979bdSMark Fasheh unsigned long flags; 3489ccd979bdSMark Fasheh int blocking; 3490ccd979bdSMark Fasheh int new_level; 3491079b8057SSunil Mushran int level; 3492ccd979bdSMark Fasheh int ret = 0; 34935ef0d4eaSMark Fasheh int set_lvb = 0; 3494de551246SJoel Becker unsigned int gen; 3495ccd979bdSMark Fasheh 3496ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3497ccd979bdSMark Fasheh 3498ccd979bdSMark Fasheh recheck: 3499db0f6ce6SSunil Mushran /* 3500db0f6ce6SSunil Mushran * Is it still blocking? If not, we have no more work to do. 3501db0f6ce6SSunil Mushran */ 3502db0f6ce6SSunil Mushran if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { 3503db0f6ce6SSunil Mushran BUG_ON(lockres->l_blocking != DLM_LOCK_NL); 3504db0f6ce6SSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 3505db0f6ce6SSunil Mushran ret = 0; 3506db0f6ce6SSunil Mushran goto leave; 3507db0f6ce6SSunil Mushran } 3508db0f6ce6SSunil Mushran 3509ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3510de551246SJoel Becker /* XXX 3511de551246SJoel Becker * This is a *big* race. The OCFS2_LOCK_PENDING flag 3512de551246SJoel Becker * exists entirely for one reason - another thread has set 3513de551246SJoel Becker * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3514de551246SJoel Becker * 3515de551246SJoel Becker * If we do ocfs2_cancel_convert() before the other thread 3516de551246SJoel Becker * calls dlm_lock(), our cancel will do nothing. We will 3517de551246SJoel Becker * get no ast, and we will have no way of knowing the 3518de551246SJoel Becker * cancel failed. Meanwhile, the other thread will call 3519de551246SJoel Becker * into dlm_lock() and wait...forever. 3520de551246SJoel Becker * 3521de551246SJoel Becker * Why forever? Because another node has asked for the 3522de551246SJoel Becker * lock first; that's why we're here in unblock_lock(). 3523de551246SJoel Becker * 3524de551246SJoel Becker * The solution is OCFS2_LOCK_PENDING. When PENDING is 3525de551246SJoel Becker * set, we just requeue the unblock. Only when the other 3526de551246SJoel Becker * thread has called dlm_lock() and cleared PENDING will 3527de551246SJoel Becker * we then cancel their request. 3528de551246SJoel Becker * 3529de551246SJoel Becker * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3530de551246SJoel Becker * at the same time they set OCFS2_DLM_BUSY. They must 3531de551246SJoel Becker * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3532de551246SJoel Becker */ 35339b915181SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_PENDING) { 35349b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", 35359b915181SSunil Mushran lockres->l_name); 3536de551246SJoel Becker goto leave_requeue; 35379b915181SSunil Mushran } 3538de551246SJoel Becker 3539d680efe9SMark Fasheh ctl->requeue = 1; 3540ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 3541ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3542ccd979bdSMark Fasheh if (ret) { 3543ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 3544ccd979bdSMark Fasheh if (ret < 0) 3545ccd979bdSMark Fasheh mlog_errno(ret); 3546ccd979bdSMark Fasheh } 3547ccd979bdSMark Fasheh goto leave; 3548ccd979bdSMark Fasheh } 3549ccd979bdSMark Fasheh 3550a1912826SSunil Mushran /* 3551a1912826SSunil Mushran * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is 3552a1912826SSunil Mushran * set when the ast is received for an upconvert just before the 3553a1912826SSunil Mushran * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast 3554a1912826SSunil Mushran * on the heels of the ast, we want to delay the downconvert just 3555a1912826SSunil Mushran * enough to allow the up requestor to do its task. Because this 3556a1912826SSunil Mushran * lock is in the blocked queue, the lock will be downconverted 3557a1912826SSunil Mushran * as soon as the requestor is done with the lock. 3558a1912826SSunil Mushran */ 3559a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) 3560a1912826SSunil Mushran goto leave_requeue; 3561a1912826SSunil Mushran 35620d74125aSSunil Mushran /* 35630d74125aSSunil Mushran * How can we block and yet be at NL? We were trying to upconvert 35640d74125aSSunil Mushran * from NL and got canceled. The code comes back here, and now 35650d74125aSSunil Mushran * we notice and clear BLOCKING. 35660d74125aSSunil Mushran */ 35670d74125aSSunil Mushran if (lockres->l_level == DLM_LOCK_NL) { 35680d74125aSSunil Mushran BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); 35699b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); 35700d74125aSSunil Mushran lockres->l_blocking = DLM_LOCK_NL; 35710d74125aSSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 35720d74125aSSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 35730d74125aSSunil Mushran goto leave; 35740d74125aSSunil Mushran } 35750d74125aSSunil Mushran 3576ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 3577ccd979bdSMark Fasheh * then requeue. */ 3578bd3e7610SJoel Becker if ((lockres->l_blocking == DLM_LOCK_EX) 35799b915181SSunil Mushran && (lockres->l_ex_holders || lockres->l_ro_holders)) { 35809b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", 35819b915181SSunil Mushran lockres->l_name, lockres->l_ex_holders, 35829b915181SSunil Mushran lockres->l_ro_holders); 3583f7fbfdd1SMark Fasheh goto leave_requeue; 35849b915181SSunil Mushran } 3585ccd979bdSMark Fasheh 3586ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 3587ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 3588bd3e7610SJoel Becker if (lockres->l_blocking == DLM_LOCK_PR && 35899b915181SSunil Mushran lockres->l_ex_holders) { 35909b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", 35919b915181SSunil Mushran lockres->l_name, lockres->l_ex_holders); 3592f7fbfdd1SMark Fasheh goto leave_requeue; 35939b915181SSunil Mushran } 3594f7fbfdd1SMark Fasheh 3595f7fbfdd1SMark Fasheh /* 3596f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 3597f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 3598f7fbfdd1SMark Fasheh */ 3599f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 36009b915181SSunil Mushran && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { 36019b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", 36029b915181SSunil Mushran lockres->l_name); 3603f7fbfdd1SMark Fasheh goto leave_requeue; 36049b915181SSunil Mushran } 3605ccd979bdSMark Fasheh 360616d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 360716d5b956SMark Fasheh 360816d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 36099b915181SSunil Mushran && !lockres->l_ops->check_downconvert(lockres, new_level)) { 36109b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", 36119b915181SSunil Mushran lockres->l_name); 361216d5b956SMark Fasheh goto leave_requeue; 36139b915181SSunil Mushran } 361416d5b956SMark Fasheh 3615ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 3616ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 3617ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 3618cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 3619ccd979bdSMark Fasheh goto downconvert; 3620ccd979bdSMark Fasheh 3621ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 3622ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 3623ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 3624ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 3625ccd979bdSMark Fasheh blocking = lockres->l_blocking; 3626079b8057SSunil Mushran level = lockres->l_level; 3627ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3628ccd979bdSMark Fasheh 3629cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3630d680efe9SMark Fasheh 36319b915181SSunil Mushran if (ctl->unblock_action == UNBLOCK_STOP_POST) { 36329b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", 36339b915181SSunil Mushran lockres->l_name); 3634d680efe9SMark Fasheh goto leave; 36359b915181SSunil Mushran } 3636ccd979bdSMark Fasheh 3637ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3638079b8057SSunil Mushran if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { 3639ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 3640ccd979bdSMark Fasheh * it just yet. */ 36419b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " 36429b915181SSunil Mushran "Recheck\n", lockres->l_name, blocking, 36439b915181SSunil Mushran lockres->l_blocking, level, lockres->l_level); 3644ccd979bdSMark Fasheh goto recheck; 3645ccd979bdSMark Fasheh } 3646ccd979bdSMark Fasheh 3647ccd979bdSMark Fasheh downconvert: 3648d680efe9SMark Fasheh ctl->requeue = 0; 3649ccd979bdSMark Fasheh 36505ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3651bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_EX) 36525ef0d4eaSMark Fasheh set_lvb = 1; 36535ef0d4eaSMark Fasheh 36545ef0d4eaSMark Fasheh /* 36555ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 36565ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 36575ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 36585ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 36595ef0d4eaSMark Fasheh */ 36605ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 36615ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 36625ef0d4eaSMark Fasheh } 36635ef0d4eaSMark Fasheh 3664de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, new_level); 3665ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3666de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3667de551246SJoel Becker gen); 3668de551246SJoel Becker 3669ccd979bdSMark Fasheh leave: 3670c1e8d35eSTao Ma if (ret) 3671c1e8d35eSTao Ma mlog_errno(ret); 3672ccd979bdSMark Fasheh return ret; 3673f7fbfdd1SMark Fasheh 3674f7fbfdd1SMark Fasheh leave_requeue: 3675f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3676f7fbfdd1SMark Fasheh ctl->requeue = 1; 3677f7fbfdd1SMark Fasheh 3678f7fbfdd1SMark Fasheh return 0; 3679ccd979bdSMark Fasheh } 3680ccd979bdSMark Fasheh 3681d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3682ccd979bdSMark Fasheh int blocking) 3683ccd979bdSMark Fasheh { 3684ccd979bdSMark Fasheh struct inode *inode; 3685ccd979bdSMark Fasheh struct address_space *mapping; 36865e98d492SGoldwyn Rodrigues struct ocfs2_inode_info *oi; 3687ccd979bdSMark Fasheh 3688ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 3689ccd979bdSMark Fasheh mapping = inode->i_mapping; 3690ccd979bdSMark Fasheh 36915e98d492SGoldwyn Rodrigues if (S_ISDIR(inode->i_mode)) { 36925e98d492SGoldwyn Rodrigues oi = OCFS2_I(inode); 36935e98d492SGoldwyn Rodrigues oi->ip_dir_lock_gen++; 36945e98d492SGoldwyn Rodrigues mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); 36955e98d492SGoldwyn Rodrigues goto out; 36965e98d492SGoldwyn Rodrigues } 36975e98d492SGoldwyn Rodrigues 36981044e401SMark Fasheh if (!S_ISREG(inode->i_mode)) 3699f1f54068SMark Fasheh goto out; 3700f1f54068SMark Fasheh 37017f4a2a97SMark Fasheh /* 37027f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 37037f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 37047f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 37057f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 37067f4a2a97SMark Fasheh * them up again. 37077f4a2a97SMark Fasheh */ 37087f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 37097f4a2a97SMark Fasheh 3710ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 3711b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3712b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 3713ccd979bdSMark Fasheh } 3714ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 3715bd3e7610SJoel Becker if (blocking == DLM_LOCK_EX) { 3716ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 3717ccd979bdSMark Fasheh } else { 3718ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 3719ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 3720ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 3721ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 3722ccd979bdSMark Fasheh * them around in that case. */ 3723ccd979bdSMark Fasheh filemap_fdatawait(mapping); 3724ccd979bdSMark Fasheh } 3725ccd979bdSMark Fasheh 3726b8a7a3a6SAndreas Gruenbacher forget_all_cached_acls(inode); 3727b8a7a3a6SAndreas Gruenbacher 3728f1f54068SMark Fasheh out: 3729d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3730ccd979bdSMark Fasheh } 3731ccd979bdSMark Fasheh 3732a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, 3733a4338481STao Ma struct ocfs2_lock_res *lockres, 3734810d5aebSMark Fasheh int new_level) 3735810d5aebSMark Fasheh { 3736a4338481STao Ma int checkpointed = ocfs2_ci_fully_checkpointed(ci); 3737810d5aebSMark Fasheh 3738bd3e7610SJoel Becker BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3739bd3e7610SJoel Becker BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3740810d5aebSMark Fasheh 3741810d5aebSMark Fasheh if (checkpointed) 3742810d5aebSMark Fasheh return 1; 3743810d5aebSMark Fasheh 3744a4338481STao Ma ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); 3745810d5aebSMark Fasheh return 0; 3746810d5aebSMark Fasheh } 3747810d5aebSMark Fasheh 3748a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3749a4338481STao Ma int new_level) 3750a4338481STao Ma { 3751a4338481STao Ma struct inode *inode = ocfs2_lock_res_inode(lockres); 3752a4338481STao Ma 3753a4338481STao Ma return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); 3754a4338481STao Ma } 3755a4338481STao Ma 3756810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3757810d5aebSMark Fasheh { 3758810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3759810d5aebSMark Fasheh 3760810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 3761810d5aebSMark Fasheh } 3762810d5aebSMark Fasheh 3763d680efe9SMark Fasheh /* 3764d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 376534d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 3766d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 3767d680efe9SMark Fasheh */ 3768d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3769d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3770d680efe9SMark Fasheh { 3771d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3772d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 3773d680efe9SMark Fasheh } 3774d680efe9SMark Fasheh 3775d680efe9SMark Fasheh /* 3776d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 3777d680efe9SMark Fasheh * 3778d680efe9SMark Fasheh * At this point, any process waiting to destroy the 3779d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 3780d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 3781d680efe9SMark Fasheh * 3782d680efe9SMark Fasheh * We have two potential problems 3783d680efe9SMark Fasheh * 3784d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 3785d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3786d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 3787d680efe9SMark Fasheh * reference and push the drop until after we've completed our 3788d680efe9SMark Fasheh * unblock processing. 3789d680efe9SMark Fasheh * 3790d680efe9SMark Fasheh * 2) There might be another process with a final reference, 3791d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 3792d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 3793d680efe9SMark Fasheh */ 3794d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3795d680efe9SMark Fasheh int blocking) 3796d680efe9SMark Fasheh { 3797d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3798d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3799d680efe9SMark Fasheh struct dentry *dentry; 3800d680efe9SMark Fasheh unsigned long flags; 3801d680efe9SMark Fasheh int extra_ref = 0; 3802d680efe9SMark Fasheh 3803d680efe9SMark Fasheh /* 3804d680efe9SMark Fasheh * This node is blocking another node from getting a read 3805d680efe9SMark Fasheh * lock. This happens when we've renamed within a 3806d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 3807d680efe9SMark Fasheh * we never actually dropped our lock because it's still 3808d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 3809d680efe9SMark Fasheh * so there's no further work to do. 3810d680efe9SMark Fasheh */ 3811bd3e7610SJoel Becker if (blocking == DLM_LOCK_PR) 3812d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3813d680efe9SMark Fasheh 3814d680efe9SMark Fasheh /* 3815d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 3816d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 3817d680efe9SMark Fasheh * needs to be freed or not. 3818d680efe9SMark Fasheh */ 3819d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 3820d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3821d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 3822d680efe9SMark Fasheh 3823d680efe9SMark Fasheh /* 3824d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 3825d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 3826d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 3827d680efe9SMark Fasheh * flag. 3828d680efe9SMark Fasheh */ 3829d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3830d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3831d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3832d680efe9SMark Fasheh && dl->dl_count) { 3833d680efe9SMark Fasheh dl->dl_count++; 3834d680efe9SMark Fasheh extra_ref = 1; 3835d680efe9SMark Fasheh } 3836d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3837d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3838d680efe9SMark Fasheh 3839d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 3840d680efe9SMark Fasheh 3841d680efe9SMark Fasheh /* 3842d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 3843d680efe9SMark Fasheh * which means we can't have any more outstanding 3844d680efe9SMark Fasheh * aliases. There's no need to do any more work. 3845d680efe9SMark Fasheh */ 3846d680efe9SMark Fasheh if (!extra_ref) 3847d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3848d680efe9SMark Fasheh 3849d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3850d680efe9SMark Fasheh while (1) { 3851d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 3852d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 3853d680efe9SMark Fasheh if (!dentry) 3854d680efe9SMark Fasheh break; 3855d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3856d680efe9SMark Fasheh 385710ab8811Salex chen if (S_ISDIR(dl->dl_inode->i_mode)) 385810ab8811Salex chen shrink_dcache_parent(dentry); 385910ab8811Salex chen 3860a455589fSAl Viro mlog(0, "d_delete(%pd);\n", dentry); 3861d680efe9SMark Fasheh 3862d680efe9SMark Fasheh /* 3863d680efe9SMark Fasheh * The following dcache calls may do an 3864d680efe9SMark Fasheh * iput(). Normally we don't want that from the 3865d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 3866d680efe9SMark Fasheh * because the requesting node already has an 3867d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 3868d680efe9SMark Fasheh * for a downconvert. 3869d680efe9SMark Fasheh */ 3870d680efe9SMark Fasheh d_delete(dentry); 3871d680efe9SMark Fasheh dput(dentry); 3872d680efe9SMark Fasheh 3873d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3874d680efe9SMark Fasheh } 3875d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3876d680efe9SMark Fasheh 3877d680efe9SMark Fasheh /* 3878d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 3879d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 3880d680efe9SMark Fasheh */ 3881d680efe9SMark Fasheh if (dl->dl_count == 1) 3882d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 3883d680efe9SMark Fasheh 3884d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 3885d680efe9SMark Fasheh } 3886d680efe9SMark Fasheh 38878dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 38888dec98edSTao Ma int new_level) 38898dec98edSTao Ma { 38908dec98edSTao Ma struct ocfs2_refcount_tree *tree = 38918dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 38928dec98edSTao Ma 38938dec98edSTao Ma return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); 38948dec98edSTao Ma } 38958dec98edSTao Ma 38968dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 38978dec98edSTao Ma int blocking) 38988dec98edSTao Ma { 38998dec98edSTao Ma struct ocfs2_refcount_tree *tree = 39008dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 39018dec98edSTao Ma 39028dec98edSTao Ma ocfs2_metadata_cache_purge(&tree->rf_ci); 39038dec98edSTao Ma 39048dec98edSTao Ma return UNBLOCK_CONTINUE; 39058dec98edSTao Ma } 39068dec98edSTao Ma 39079e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 39089e33d69fSJan Kara { 39099e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb; 39109e33d69fSJan Kara struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 39119e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 39129e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 39139e33d69fSJan Kara 3914a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 39159e33d69fSJan Kara lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 39169e33d69fSJan Kara lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 39179e33d69fSJan Kara lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 39189e33d69fSJan Kara lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 39199e33d69fSJan Kara lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 39209e33d69fSJan Kara lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 39219e33d69fSJan Kara lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 39229e33d69fSJan Kara } 39239e33d69fSJan Kara 39249e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 39259e33d69fSJan Kara { 39269e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 39279e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 39289e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 39299e33d69fSJan Kara 39309e33d69fSJan Kara if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 39319e33d69fSJan Kara ocfs2_cluster_unlock(osb, lockres, level); 39329e33d69fSJan Kara } 39339e33d69fSJan Kara 39349e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 39359e33d69fSJan Kara { 39369e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 39379e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 39389e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 39399e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 394085eb8b73SJoel Becker struct buffer_head *bh = NULL; 39419e33d69fSJan Kara struct ocfs2_global_disk_dqinfo *gdinfo; 39429e33d69fSJan Kara int status = 0; 39439e33d69fSJan Kara 39441c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 39451c520dfbSJoel Becker lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 39469e33d69fSJan Kara info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 39479e33d69fSJan Kara info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 39489e33d69fSJan Kara oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 39499e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 39509e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 39519e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 39529e33d69fSJan Kara be32_to_cpu(lvb->lvb_free_entry); 39539e33d69fSJan Kara } else { 3954ae4f6ef1SJan Kara status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, 3955ae4f6ef1SJan Kara oinfo->dqi_giblk, &bh); 395685eb8b73SJoel Becker if (status) { 39579e33d69fSJan Kara mlog_errno(status); 39589e33d69fSJan Kara goto bail; 39599e33d69fSJan Kara } 39609e33d69fSJan Kara gdinfo = (struct ocfs2_global_disk_dqinfo *) 39619e33d69fSJan Kara (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 39629e33d69fSJan Kara info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 39639e33d69fSJan Kara info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 39649e33d69fSJan Kara oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 39659e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 39669e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 39679e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 39689e33d69fSJan Kara le32_to_cpu(gdinfo->dqi_free_entry); 39699e33d69fSJan Kara brelse(bh); 39709e33d69fSJan Kara ocfs2_track_lock_refresh(lockres); 39719e33d69fSJan Kara } 39729e33d69fSJan Kara 39739e33d69fSJan Kara bail: 39749e33d69fSJan Kara return status; 39759e33d69fSJan Kara } 39769e33d69fSJan Kara 39779e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file 39789e33d69fSJan Kara * so that we can safely refresh quota info from disk. */ 39799e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 39809e33d69fSJan Kara { 39819e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 39829e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 39839e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 39849e33d69fSJan Kara int status = 0; 39859e33d69fSJan Kara 39869e33d69fSJan Kara /* On RO devices, locking really isn't needed... */ 39879e33d69fSJan Kara if (ocfs2_is_hard_readonly(osb)) { 39889e33d69fSJan Kara if (ex) 39899e33d69fSJan Kara status = -EROFS; 39909e33d69fSJan Kara goto bail; 39919e33d69fSJan Kara } 39929e33d69fSJan Kara if (ocfs2_mount_local(osb)) 39939e33d69fSJan Kara goto bail; 39949e33d69fSJan Kara 39959e33d69fSJan Kara status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 39969e33d69fSJan Kara if (status < 0) { 39979e33d69fSJan Kara mlog_errno(status); 39989e33d69fSJan Kara goto bail; 39999e33d69fSJan Kara } 40009e33d69fSJan Kara if (!ocfs2_should_refresh_lock_res(lockres)) 40019e33d69fSJan Kara goto bail; 40029e33d69fSJan Kara /* OK, we have the lock but we need to refresh the quota info */ 40039e33d69fSJan Kara status = ocfs2_refresh_qinfo(oinfo); 40049e33d69fSJan Kara if (status) 40059e33d69fSJan Kara ocfs2_qinfo_unlock(oinfo, ex); 40069e33d69fSJan Kara ocfs2_complete_lock_res_refresh(lockres, status); 40079e33d69fSJan Kara bail: 40089e33d69fSJan Kara return status; 40099e33d69fSJan Kara } 40109e33d69fSJan Kara 40118dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) 40128dec98edSTao Ma { 40138dec98edSTao Ma int status; 40148dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 40158dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 40168dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 40178dec98edSTao Ma 40188dec98edSTao Ma 40198dec98edSTao Ma if (ocfs2_is_hard_readonly(osb)) 40208dec98edSTao Ma return -EROFS; 40218dec98edSTao Ma 40228dec98edSTao Ma if (ocfs2_mount_local(osb)) 40238dec98edSTao Ma return 0; 40248dec98edSTao Ma 40258dec98edSTao Ma status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 40268dec98edSTao Ma if (status < 0) 40278dec98edSTao Ma mlog_errno(status); 40288dec98edSTao Ma 40298dec98edSTao Ma return status; 40308dec98edSTao Ma } 40318dec98edSTao Ma 40328dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) 40338dec98edSTao Ma { 40348dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 40358dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 40368dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 40378dec98edSTao Ma 40388dec98edSTao Ma if (!ocfs2_mount_local(osb)) 40398dec98edSTao Ma ocfs2_cluster_unlock(osb, lockres, level); 40408dec98edSTao Ma } 40418dec98edSTao Ma 404200600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 4043ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 4044ccd979bdSMark Fasheh { 4045ccd979bdSMark Fasheh int status; 4046d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 4047ccd979bdSMark Fasheh unsigned long flags; 4048ccd979bdSMark Fasheh 4049ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 4050ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 4051ccd979bdSMark Fasheh * flag. */ 4052ccd979bdSMark Fasheh 4053ccd979bdSMark Fasheh BUG_ON(!lockres); 4054ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 4055ccd979bdSMark Fasheh 40569b915181SSunil Mushran mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); 4057ccd979bdSMark Fasheh 4058ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 405934d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 4060ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 4061ccd979bdSMark Fasheh * but short circuiting here will still save us some 4062ccd979bdSMark Fasheh * performance. */ 4063ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 4064ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 4065ccd979bdSMark Fasheh goto unqueue; 4066ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 4067ccd979bdSMark Fasheh 4068b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 4069ccd979bdSMark Fasheh if (status < 0) 4070ccd979bdSMark Fasheh mlog_errno(status); 4071ccd979bdSMark Fasheh 4072ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 4073ccd979bdSMark Fasheh unqueue: 4074d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 4075ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 4076ccd979bdSMark Fasheh } else 4077ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 4078ccd979bdSMark Fasheh 40799b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, 4080d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 4081ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 4082ccd979bdSMark Fasheh 4083d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 4084d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 4085d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 4086ccd979bdSMark Fasheh } 4087ccd979bdSMark Fasheh 4088ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 4089ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 4090ccd979bdSMark Fasheh { 4091a75e9ccaSSrinivas Eeda unsigned long flags; 4092a75e9ccaSSrinivas Eeda 4093ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 4094ccd979bdSMark Fasheh 4095ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 4096ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 4097ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 4098ccd979bdSMark Fasheh * to the resource will get it soon. */ 40999b915181SSunil Mushran mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", 4100ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 4101ccd979bdSMark Fasheh return; 4102ccd979bdSMark Fasheh } 4103ccd979bdSMark Fasheh 4104ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 4105ccd979bdSMark Fasheh 4106a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 4107ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 4108ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 4109ccd979bdSMark Fasheh &osb->blocked_lock_list); 4110ccd979bdSMark Fasheh osb->blocked_lock_count++; 4111ccd979bdSMark Fasheh } 4112a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 4113ccd979bdSMark Fasheh } 411434d024f8SMark Fasheh 411534d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 411634d024f8SMark Fasheh { 411734d024f8SMark Fasheh unsigned long processed; 4118a75e9ccaSSrinivas Eeda unsigned long flags; 411934d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 412034d024f8SMark Fasheh 4121a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 412234d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 412334d024f8SMark Fasheh * wake happens part-way through our work */ 412434d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 412534d024f8SMark Fasheh 412634d024f8SMark Fasheh processed = osb->blocked_lock_count; 4127209f7512SJoseph Qi /* 4128209f7512SJoseph Qi * blocked lock processing in this loop might call iput which can 4129209f7512SJoseph Qi * remove items off osb->blocked_lock_list. Downconvert up to 4130209f7512SJoseph Qi * 'processed' number of locks, but stop short if we had some 4131209f7512SJoseph Qi * removed in ocfs2_mark_lockres_freeing when downconverting. 4132209f7512SJoseph Qi */ 4133209f7512SJoseph Qi while (processed && !list_empty(&osb->blocked_lock_list)) { 413434d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 413534d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 413634d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 413734d024f8SMark Fasheh osb->blocked_lock_count--; 4138a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 413934d024f8SMark Fasheh 414034d024f8SMark Fasheh BUG_ON(!processed); 414134d024f8SMark Fasheh processed--; 414234d024f8SMark Fasheh 414334d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 414434d024f8SMark Fasheh 4145a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 414634d024f8SMark Fasheh } 4147a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 414834d024f8SMark Fasheh } 414934d024f8SMark Fasheh 415034d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 415134d024f8SMark Fasheh { 415234d024f8SMark Fasheh int empty = 0; 4153a75e9ccaSSrinivas Eeda unsigned long flags; 415434d024f8SMark Fasheh 4155a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 415634d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 415734d024f8SMark Fasheh empty = 1; 415834d024f8SMark Fasheh 4159a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 416034d024f8SMark Fasheh return empty; 416134d024f8SMark Fasheh } 416234d024f8SMark Fasheh 416334d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 416434d024f8SMark Fasheh { 416534d024f8SMark Fasheh int should_wake = 0; 4166a75e9ccaSSrinivas Eeda unsigned long flags; 416734d024f8SMark Fasheh 4168a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 416934d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 417034d024f8SMark Fasheh should_wake = 1; 4171a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 417234d024f8SMark Fasheh 417334d024f8SMark Fasheh return should_wake; 417434d024f8SMark Fasheh } 417534d024f8SMark Fasheh 4176200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg) 417734d024f8SMark Fasheh { 417834d024f8SMark Fasheh int status = 0; 417934d024f8SMark Fasheh struct ocfs2_super *osb = arg; 418034d024f8SMark Fasheh 418134d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 418234d024f8SMark Fasheh * work available */ 418334d024f8SMark Fasheh while (!(kthread_should_stop() && 418434d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 418534d024f8SMark Fasheh 418634d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 418734d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 418834d024f8SMark Fasheh kthread_should_stop()); 418934d024f8SMark Fasheh 419034d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 419134d024f8SMark Fasheh 419234d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 419334d024f8SMark Fasheh } 419434d024f8SMark Fasheh 419534d024f8SMark Fasheh osb->dc_task = NULL; 419634d024f8SMark Fasheh return status; 419734d024f8SMark Fasheh } 419834d024f8SMark Fasheh 419934d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 420034d024f8SMark Fasheh { 4201a75e9ccaSSrinivas Eeda unsigned long flags; 4202a75e9ccaSSrinivas Eeda 4203a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 420434d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 420534d024f8SMark Fasheh * the caller may have made to the voting state */ 420634d024f8SMark Fasheh osb->dc_wake_sequence++; 4207a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 420834d024f8SMark Fasheh wake_up(&osb->dc_event); 420934d024f8SMark Fasheh } 4210